diff --git a/Cargo.lock b/Cargo.lock index b2156ab4..1d15c354 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1160,6 +1160,7 @@ dependencies = [ "rustls-pemfile", "serde", "serde_json", + "siphasher", "syslog", "tempfile", "tokio", @@ -1367,6 +1368,12 @@ dependencies = [ "libc", ] +[[package]] +name = "siphasher" +version = "0.3.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7bd3e3206899af3f8b12af284fafc038cc1dc2b41d1b89dd17297221c5d225de" + [[package]] name = "slab" version = "0.4.9" diff --git a/Cargo.toml b/Cargo.toml index 9e3db16e..3044132d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -34,6 +34,7 @@ rpki = { version = "0.17.2", features = [ "repository", "rrdp", "rtr" rustls-pemfile = "1" serde = { version = "1.0.95", features = [ "derive" ] } serde_json = "1.0.57" +siphasher = "0.3.10" tempfile = "3.1.0" tokio = { version = "1.24", features = [ "io-util", "macros", "process", "rt", "rt-multi-thread", "signal", "sync" ] } tokio-rustls = "0.24.1" @@ -44,7 +45,7 @@ routinator-ui = { version = "0.3.4", optional = true } [target.'cfg(unix)'.dependencies] -nix = { version = "0.27.1", features = ["fs", "net", "process", "socket", "user"] } +nix = { version = "0.27.1", features = ["fs", "mman", "net", "process", "socket", "user"] } syslog = "6" [features] diff --git a/fuzz/Cargo.lock b/fuzz/Cargo.lock index 4d6d476f..4082f0b3 100644 --- a/fuzz/Cargo.lock +++ b/fuzz/Cargo.lock @@ -140,9 +140,9 @@ checksum = "b4682ae6287fcf752ecaabbfcc7b6f9b72aa33933dc23a554d853aea8eea8635" [[package]] name = "bumpalo" -version = "3.13.0" +version = "3.14.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a3e2c3daef883ecc1b5d58c15adae93470a91d425f3532ba1695849656af3fc1" +checksum = "7f30e7476521f6f8af1a1c4c0b8cc94f0bee37d91763d0ca2665f299b6cd8aec" [[package]] name = "bytes" @@ -168,9 +168,9 @@ checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" [[package]] name = "chrono" -version = "0.4.30" +version = "0.4.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "defd4e7873dbddba6c7c91e199c7fcb946abc4a6a4ac3195400bcfb01b5de877" +checksum = "7f2c685bad3eb3d45a01354cedb7d5faa66194d1d58ba6e267a8de788f79db38" dependencies = [ "android-tzdata", "arbitrary", @@ -184,9 +184,9 @@ dependencies = [ [[package]] name = "clap" -version = "4.4.3" +version = "4.4.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "84ed82781cea27b43c9b106a979fe450a13a31aab0500595fb3fc06616de08e6" +checksum = "b1d7b8d5ec32af0fadc644bf1fd509a688c2103b185644bb1e29d164e0703136" dependencies = [ "clap_builder", "clap_derive", @@ -194,9 +194,9 @@ dependencies = [ [[package]] name = "clap_builder" -version = "4.4.2" +version = "4.4.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2bb9faaa7c2ef94b2743a21f5a29e6f0010dff4caa69ac8e9d6cf8b6fa74da08" +checksum = "5179bb514e4d7c2051749d8fcefa2ed6d06a9f4e6d69faf3805f5d80b8cf8d56" dependencies = [ "anstream", "anstyle", @@ -540,9 +540,9 @@ checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8" [[package]] name = "hermit-abi" -version = "0.3.2" +version = "0.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "443144c8cdadd93ebf52ddb4056d257f5b52c04d3c804e657d19eb73fc33668b" +checksum = "d77f7ec81a6d05a3abb01ab6eb7590f6083d08449fe5a1c8b1e620283546ccb7" [[package]] name = "hostname" @@ -680,17 +680,6 @@ dependencies = [ "hashbrown 0.14.0", ] -[[package]] -name = "io-lifetimes" -version = "1.0.11" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eae7b9aee968036d54dce06cebaefd919e4472e753296daccd6d344e3e2df0c2" -dependencies = [ - "hermit-abi", - "libc", - "windows-sys", -] - [[package]] name = "ipnet" version = "2.8.0" @@ -738,12 +727,6 @@ dependencies = [ "once_cell", ] -[[package]] -name = "linux-raw-sys" -version = "0.3.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ef53942eb7bf7ff43a617b3e2c1c4a5ecf5944a7c1bc12d7ee39bbb15e5c1519" - [[package]] name = "linux-raw-sys" version = "0.4.7" @@ -781,9 +764,9 @@ checksum = "8f232d6ef707e1956a43342693d2a31e72989554d58299d7a88738cc95b0d35c" [[package]] name = "memoffset" -version = "0.7.1" +version = "0.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5de893c32cde5f383baa4c04c5d6dbdd735cfd4a794b0debdb2bb1b421da5ff4" +checksum = "5a634b1c61a95585bd15607c6ab0c4e5b226e695ff2800ba0cdccddf208c406c" dependencies = [ "autocfg", ] @@ -816,15 +799,14 @@ dependencies = [ [[package]] name = "nix" -version = "0.26.4" +version = "0.27.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "598beaf3cc6fdd9a5dfb1630c2800c7acd31df7aaf0f565796fba2b53ca1af1b" +checksum = "2eb04e9c688eff1c89d72b407f168cf79bb9e867a9d3323ed6c01519eb9cc053" dependencies = [ - "bitflags 1.3.2", + "bitflags 2.4.0", "cfg-if", "libc", "memoffset", - "pin-utils", ] [[package]] @@ -902,9 +884,9 @@ checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de" [[package]] name = "proc-macro2" -version = "1.0.66" +version = "1.0.67" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "18fb31db3f9bddb2ea821cde30a9f70117e3f119938b5ee630b7403aa6e2ead9" +checksum = "3d433d9f1a3e8c1263d9456598b16fec66f4acc9a74dacffd35c7bb09b3a1328" dependencies = [ "unicode-ident", ] @@ -1043,10 +1025,9 @@ dependencies = [ [[package]] name = "routinator" -version = "0.13.0-dev" +version = "0.13.1-dev" dependencies = [ "arbitrary", - "bcder", "bytes", "chrono", "clap", @@ -1064,10 +1045,11 @@ dependencies = [ "reqwest", "ring", "routinator-ui", - "rpki 0.17.1", + "rpki", "rustls-pemfile", "serde", "serde_json", + "siphasher", "syslog", "tempfile", "tokio", @@ -1081,9 +1063,11 @@ dependencies = [ name = "routinator-fuzz" version = "0.0.0" dependencies = [ + "arbitrary", "libfuzzer-sys", "routinator", - "rpki 0.17.2-dev", + "rpki", + "tempfile", ] [[package]] @@ -1099,9 +1083,9 @@ dependencies = [ [[package]] name = "rpki" -version = "0.17.1" +version = "0.17.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a2e2cf92592175551ef134dba1b30f8d1526479e680399d3a1eef27136023373" +checksum = "98a05b958a41ba8c923cf14bd2ad5f1aca3f3509c8ffd147c36e094346a0290b" dependencies = [ "arbitrary", "base64", @@ -1120,27 +1104,6 @@ dependencies = [ "uuid", ] -[[package]] -name = "rpki" -version = "0.17.2-dev" -source = "git+https://github.com/NLnetLabs/rpki-rs.git#026ed37ec736ad74d6f8cd6bcf09c472f78ed7b8" -dependencies = [ - "base64", - "bcder", - "bytes", - "chrono", - "futures-util", - "log", - "quick-xml", - "ring", - "serde", - "serde_json", - "tokio", - "tokio-stream", - "untrusted", - "uuid", -] - [[package]] name = "rustc-demangle" version = "0.1.23" @@ -1149,28 +1112,14 @@ checksum = "d626bb9dae77e28219937af045c257c28bfd3f69333c512553507f5f9798cb76" [[package]] name = "rustix" -version = "0.37.23" +version = "0.38.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4d69718bf81c6127a49dc64e44a742e8bb9213c0ff8869a22c308f84c1d4ab06" -dependencies = [ - "bitflags 1.3.2", - "errno", - "io-lifetimes", - "libc", - "linux-raw-sys 0.3.8", - "windows-sys", -] - -[[package]] -name = "rustix" -version = "0.38.13" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d7db8590df6dfcd144d22afd1b83b36c21a18d7cbc1dc4bb5295a8712e9eb662" +checksum = "747c788e9ce8e92b12cd485c49ddf90723550b654b32508f979b71a7b1ecda4f" dependencies = [ "bitflags 2.4.0", "errno", "libc", - "linux-raw-sys 0.4.7", + "linux-raw-sys", "windows-sys", ] @@ -1197,9 +1146,9 @@ dependencies = [ [[package]] name = "rustls-webpki" -version = "0.101.5" +version = "0.101.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "45a27e3b59326c16e23d30aeb7a36a24cc0d29e71d68ff611cdfb4a01d013bed" +checksum = "3c7d5dece342910d9ba34d259310cae3e0154b873b35408b787b59bce53d34fe" dependencies = [ "ring", "untrusted", @@ -1243,9 +1192,9 @@ dependencies = [ [[package]] name = "serde_json" -version = "1.0.106" +version = "1.0.107" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2cc66a619ed80bf7a0f6b17dd063a84b88f6dea1813737cf469aef1d081142c2" +checksum = "6b420ce6e3d8bd882e9b243c6eed35dbc9a6110c9769e74b584e0d68d1f20c65" dependencies = [ "itoa", "ryu", @@ -1273,6 +1222,12 @@ dependencies = [ "libc", ] +[[package]] +name = "siphasher" +version = "0.3.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "38b58827f4464d87d377d175e90bf58eb00fd8716ff0a62f80356b5e61555d0d" + [[package]] name = "slab" version = "0.4.9" @@ -1284,9 +1239,9 @@ dependencies = [ [[package]] name = "smallvec" -version = "1.11.0" +version = "1.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "62bb4feee49fdd9f707ef802e22365a35de4b7b299de4763d44bfea899442ff9" +checksum = "942b4a808e05215192e39f4ab80813e599068285906cc91aa64f923db842bd5a" [[package]] name = "socket2" @@ -1322,9 +1277,9 @@ checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623" [[package]] name = "syn" -version = "2.0.32" +version = "2.0.37" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "239814284fd6f1a4ffe4ca893952cdd93c224b6a1571c9a9eadd670295c0c9e2" +checksum = "7303ef2c05cd654186cb250d29049a24840ca25d2747c25c0381c8d9e2f582e8" dependencies = [ "proc-macro2", "quote", @@ -1364,17 +1319,17 @@ dependencies = [ "cfg-if", "fastrand", "redox_syscall 0.3.5", - "rustix 0.38.13", + "rustix", "windows-sys", ] [[package]] name = "terminal_size" -version = "0.2.6" +version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8e6bf6f19e9f8ed8d4048dc22981458ebcf406d67e94cd422e5ecd73d63b3237" +checksum = "21bebf2b7c9e0a515f6e0f8c51dc0f8e4696391e6f1ff30379559f8365fb0df7" dependencies = [ - "rustix 0.37.23", + "rustix", "windows-sys", ] @@ -1400,9 +1355,9 @@ dependencies = [ [[package]] name = "time" -version = "0.3.28" +version = "0.3.29" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "17f6bb557fd245c28e6411aa56b6403c689ad95061f50e4be16c274e70a17e48" +checksum = "426f806f4089c493dcac0d24c29c01e2c38baf8e30f1b716ee37e83d200b18fe" dependencies = [ "deranged", "itoa", @@ -1415,15 +1370,15 @@ dependencies = [ [[package]] name = "time-core" -version = "0.1.1" +version = "0.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7300fbefb4dadc1af235a9cef3737cea692a9d97e1b9cbcd4ebdae6f8868e6fb" +checksum = "ef927ca75afb808a4d64dd374f00a2adf8d0fcff8e7b184af886c3c87ec4a3f3" [[package]] name = "time-macros" -version = "0.2.14" +version = "0.2.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1a942f44339478ef67935ab2bbaec2fb0322496cf3cbe84b261e06ac3814c572" +checksum = "4ad70d68dba9e1f8aceda7aa6711965dfec1cac869f311a51bd08b3a2ccbce20" dependencies = [ "time-core", ] @@ -1507,9 +1462,9 @@ dependencies = [ [[package]] name = "tokio-util" -version = "0.7.8" +version = "0.7.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "806fe8c2c87eccc8b3267cbae29ed3ab2d0bd37fca70ab622e46aaa9375ddb7d" +checksum = "1d68074620f57a0b21594d9735eb2e98ab38b17f80d3fcb189fca266771ca60d" dependencies = [ "bytes", "futures-core", @@ -1527,9 +1482,9 @@ checksum = "7cda73e2f1397b1262d6dfdcef8aafae14d1de7748d66822d3bfeeb6d03e5e4b" [[package]] name = "toml_edit" -version = "0.19.15" +version = "0.20.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1b5bb770da30e5cbfde35a2d7b9b8a2c4b8ef89548a7a6aeab5c9a576e3e7421" +checksum = "8ff63e60a958cefbb518ae1fd6566af80d9d4be430a33f3723dfc47d1d411d95" dependencies = [ "indexmap 2.0.0", "toml_datetime", diff --git a/fuzz/Cargo.toml b/fuzz/Cargo.toml index a30f8cae..fac238f0 100644 --- a/fuzz/Cargo.toml +++ b/fuzz/Cargo.toml @@ -8,8 +8,10 @@ edition = "2021" cargo-fuzz = true [dependencies] +arbitrary = "1" libfuzzer-sys = "0.4" -rpki = { git = "https://github.com/NLnetLabs/rpki-rs.git", features = [ "repository", "rrdp", "rtr", "serde", "slurm" ] } +rpki = { version = "0.17.1", features = [ "repository", "rrdp", "rtr", "serde", "slurm" ] } +tempfile = "3.1.0" [dependencies.routinator] path = ".." @@ -22,13 +24,18 @@ members = ["."] [profile.release] debug = 1 +[[bin]] +name = "archive" +path = "fuzz_targets/archive.rs" +test = false +doc = false + [[bin]] name = "construct_delta" path = "fuzz_targets/construct_delta.rs" test = false doc = false - [[bin]] name = "merge_deltas" path = "fuzz_targets/merge_deltas.rs" diff --git a/fuzz/fuzz_targets/archive.rs b/fuzz/fuzz_targets/archive.rs new file mode 100644 index 00000000..e7739b07 --- /dev/null +++ b/fuzz/fuzz_targets/archive.rs @@ -0,0 +1,105 @@ +#![no_main] + +use arbitrary::Arbitrary; +use std::collections::HashMap; +use libfuzzer_sys::fuzz_target; +use routinator::utils::archive::{ + AccessError, Archive, ArchiveError, ObjectMeta, PublishError, + StorageRead, StorageWrite, +}; + +#[derive(Arbitrary, Clone, Debug)] +enum Op { + Publish { name: Vec, data: Vec }, + Update { name: Vec, data: Vec }, + Delete { name: Vec }, +} + +struct Meta; + +impl ObjectMeta for Meta { + const SIZE: usize = 4; + type ConsistencyError = (); + + fn write( + &self, write: &mut StorageWrite + ) -> Result<(), ArchiveError> { + write.write(b"abcd") + } + + fn read( + read: &mut StorageRead + ) -> Result { + let slice = read.read_slice(4).unwrap(); + assert_eq!(slice.as_ref(), b"abcd"); + Ok(Meta) + } +} + +fn check_archive( + archive: &Archive, + content: &HashMap, Vec>, +) { + archive.verify().unwrap(); + let mut content = content.clone(); + for item in archive.objects().unwrap() { + let (name, _, data) = item.unwrap(); + assert_eq!( + content.remove(name.as_ref()).as_ref().map(|x| x.as_slice()), + Some(data.as_ref()) + ); + } + assert!(content.is_empty()); +} + +fn run_archive(ops: impl IntoIterator) { + let mut archive = Archive::create_with_file( + tempfile::tempfile().unwrap() + ).unwrap(); + let mut content = HashMap::new(); + + for item in ops { + match item { + Op::Publish { name, data } => { + if name.is_empty() { continue } + let res = archive.publish(name.as_ref(), &Meta, data.as_ref()); + if content.contains_key(&name) { + assert!(matches!(res, Err(PublishError::AlreadyExists))) + } + else { + content.insert(name, data); + assert!(matches!(res, Ok(()))); + } + } + Op::Update { name, data } => { + if name.is_empty() { continue } + let res = archive.update( + name.as_ref(), &Meta, data.as_ref(), |_| Ok(()) + ); + if content.contains_key(&name) { + content.insert(name, data); + assert!(matches!(res, Ok(()))); + } + else { + assert!(matches!(res, Err(AccessError::NotFound))) + } + } + Op::Delete { name } => { + if name.is_empty() { continue } + let res = archive.delete(name.as_ref(), |_| Ok(())); + if content.remove(name.as_slice()).is_some() { + assert!(matches!(res, Ok(()))) + } + else { + assert!(matches!(res, Err(AccessError::NotFound))) + } + } + } + + check_archive(&archive, &content); + } +} + +fuzz_target!{|actions: Vec| { + run_archive(actions) +}} diff --git a/fuzz/fuzz_targets/construct_delta.rs b/fuzz/fuzz_targets/construct_delta.rs index bacc0c38..161af2db 100644 --- a/fuzz/fuzz_targets/construct_delta.rs +++ b/fuzz/fuzz_targets/construct_delta.rs @@ -47,10 +47,10 @@ fuzz_target!{|data: (PayloadSnapshot, PayloadSnapshot, Serial)| { assert_eq!(delta_keys, set_keys); let old_aspas: HashMap<_, _> = old.aspas().map(|x| { - ((x.0.customer, x.0.afi), x.0.providers.clone()) + (x.0.customer, x.0.providers.clone()) }).collect(); let new_aspas: HashMap<_, _> = new.aspas().map(|x| { - ((x.0.customer, x.0.afi), x.0.providers.clone()) + (x.0.customer, x.0.providers.clone()) }).collect(); let delta_aspas: Vec<_> = delta.aspa_actions().map(|x| (x.0.clone(), x.1)).collect(); @@ -60,12 +60,12 @@ fuzz_target!{|data: (PayloadSnapshot, PayloadSnapshot, Serial)| { return None } } - Some((Aspa::new(key.0, key.1, val.clone()), Action::Announce)) + Some((Aspa::new(*key, val.clone()), Action::Announce)) }).chain( old_aspas.keys().filter_map(|key| { if !new_aspas.contains_key(key) { Some(( - Aspa::new(key.0, key.1, ProviderAsns::empty()), + Aspa::new(*key, ProviderAsns::empty()), Action::Withdraw )) } diff --git a/src/collector/base.rs b/src/collector/base.rs index 3c97e1ee..92c7842c 100644 --- a/src/collector/base.rs +++ b/src/collector/base.rs @@ -4,12 +4,13 @@ use std::collections::HashSet; use std::path::Path; +use std::sync::Arc; use bytes::Bytes; use log::info; use rpki::repository::tal::TalUri; use rpki::uri; use crate::config::{Config, FallbackPolicy}; -use crate::error::Failed; +use crate::error::{Failed, Fatal, RunFailed}; use crate::metrics::Metrics; use crate::engine::CaCert; use super::{rrdp, rsync}; @@ -80,6 +81,17 @@ impl Collector { Ok(()) } + /// Sanitizes the stored data. + pub fn sanitize(&self) -> Result<(), Fatal> { + if let Some(rrdp) = self.rrdp.as_ref() { + rrdp.sanitize()?; + } + if let Some(rsync) = self.rsync.as_ref() { + rsync.sanitize()?; + } + Ok(()) + } + /// Starts a new validation run using this collector. pub fn start(&self) -> Run { Run::new(self) @@ -178,7 +190,7 @@ impl<'a> Run<'a> { /// `Ok(None)`. pub fn repository<'s>( &'s self, ca: &'s CaCert - ) -> Result>, Failed> { + ) -> Result>, RunFailed> { // See if we should and can use RRDP if let Some(rrdp_uri) = ca.rpki_notify() { if let Some(ref rrdp) = self.rrdp { @@ -273,7 +285,7 @@ enum RepoInner<'a> { /// The repository is accessed via RRDP. Rrdp { /// The repository. - repository: rrdp::Repository, + repository: Arc, }, /// The repository is accessed via rsync. @@ -285,7 +297,7 @@ enum RepoInner<'a> { impl<'a> Repository<'a> { /// Creates a RRDP repository. - fn rrdp(repository: rrdp::Repository) -> Self { + fn rrdp(repository: Arc) -> Self { Repository(RepoInner::Rrdp { repository }) } @@ -307,7 +319,7 @@ impl<'a> Repository<'a> { /// information and returns `None`. pub fn load_object( &self, uri: &uri::Rsync - ) -> Result, Failed> { + ) -> Result, RunFailed> { match self.0 { RepoInner::Rrdp { ref repository } => { repository.load_object(uri) diff --git a/src/collector/rrdp.rs b/src/collector/rrdp.rs deleted file mode 100644 index 523a0f9f..00000000 --- a/src/collector/rrdp.rs +++ /dev/null @@ -1,2688 +0,0 @@ -//! Local repository copies synchronized with RRDP. -//! -//! The RRDP collector uses the file system to store its data. For each -//! hostname serving an RRDP repository, there is directory. Within these -//! directories, each repository has its own directory based on the SHA-256 -//! hash of the full rpkiNotify URI. Within this directory, all objects -//! published by the RRDP server are stored in a (relative) path constructed -//! from all the components of their rsync URI. The first of these is indeed -//! `rsync`. -//! -//! During updates, all newly published objects are stored in a temporary -//! tree alongside the actual object tree. The files are also stored in paths -//! build from their rsync URI, but the first component `rsync` is replaced -//! by `tmp`. -//! -//! For each repository, the state at last update is stored in a file named -//! `state.bin` place in the repository directory. This file is removed before -//! any update is attempted to mark the repository as ‘in flux.’ Similarly, -//! if this file is not found before an update is started, the repository is -//! considered not present even if there are actually files. - -use std::{cmp, error, fmt, fs, io}; -use std::collections::{HashMap, HashSet}; -use std::fs::File; -use std::io::{Seek, SeekFrom, Write}; -use std::path::{Path, PathBuf}; -use std::sync::Arc; -use std::time::{Duration, SystemTime}; -use bytes::Bytes; -use chrono::{DateTime, TimeZone, Utc}; -use log::{debug, error, info, warn}; -use rand::Rng; -use ring::digest; -use ring::constant_time::verify_slices_are_equal; -use reqwest::header; -use reqwest::{Certificate, Proxy, StatusCode}; -use reqwest::blocking::{Client, ClientBuilder, RequestBuilder, Response}; -use rpki::{rrdp, uri}; -use rpki::crypto::DigestAlgorithm; -use rpki::rrdp::{DeltaInfo, NotificationFile, ProcessDelta, ProcessSnapshot}; -use uuid::Uuid; -use crate::config::Config; -use crate::error::Failed; -use crate::metrics::{Metrics, RrdpRepositoryMetrics}; -use crate::utils::fatal; -use crate::utils::binio::{Compose, Parse}; -use crate::utils::date::{parse_http_date, format_http_date}; -use crate::utils::dump::DumpRegistry; -use crate::utils::json::JsonBuilder; -use crate::utils::sync::{Mutex, RwLock}; -use crate::utils::uri::UriExt; - - -//------------ Collector ----------------------------------------------------- - -/// The local copy of RPKI repositories synchronized via RRDP. -#[derive(Debug)] -pub struct Collector { - /// The path of the directory we store all our data in. - working_dir: PathBuf, - - /// The HTTP client. - http: HttpClient, - - /// Whether to filter dubious authorities in notify URIs. - filter_dubious: bool, - - /// RRDP repository fallback timeout. - /// - /// This is the time since the last known update of an RRDP repository - /// before it is considered non-existant. - fallback_time: FallbackTime, - - /// The maximum allowed size for published objects. - max_object_size: Option, - - /// The maximum number of deltas we process before using a snapshot. - max_delta_count: usize, -} - -impl Collector { - /// Initializes the RRDP collector without creating a value. - /// - /// This function is called implicitely by [`new`][Collector::new]. - pub fn init(config: &Config) -> Result<(), Failed> { - let _ = Self::create_working_dir(config)?; - Ok(()) - } - - /// Creates the working dir and returns its path. - fn create_working_dir(config: &Config) -> Result { - let working_dir = config.cache_dir.join("rrdp"); - - if config.fresh { - if let Err(err) = fs::remove_dir_all(&working_dir) { - if err.kind() != io::ErrorKind::NotFound { - error!( - "Failed to delete RRDP working directory at {}: {}", - working_dir.display(), err - ); - return Err(Failed) - } - } - } - - if let Err(err) = fs::create_dir_all(&working_dir) { - error!( - "Failed to create RRDP working directory {}: {}.", - working_dir.display(), err - ); - return Err(Failed); - } - Ok(working_dir) - } - /// Creates a new RRDP collector. - pub fn new(config: &Config) -> Result, Failed> { - if config.disable_rrdp { - return Ok(None) - } - Ok(Some(Collector { - working_dir: Self::create_working_dir(config)?, - http: HttpClient::new(config)?, - filter_dubious: !config.allow_dubious_hosts, - fallback_time: FallbackTime::from_config(config), - max_object_size: config.max_object_size, - max_delta_count: config.rrdp_max_delta_count, - })) - } - - /// Ignites the collector. - pub fn ignite(&mut self) -> Result<(), Failed> { - self.http.ignite() - } - - /// Starts a validation run using the collector. - pub fn start(&self) -> Run { - Run::new(self) - } - - /// Dumps the content of the RRDP collector. - #[allow(clippy::mutable_key_type)] - pub fn dump(&self, dir: &Path) -> Result<(), Failed> { - let dir = dir.join("rrdp"); - debug!("Dumping RRDP collector content to {}", dir.display()); - let mut registry = DumpRegistry::new(dir); - let mut states = HashMap::new(); - for entry in fatal::read_dir(&self.working_dir)? { - let entry = entry?; - if !entry.is_dir() { - continue; - } - for entry in fatal::read_dir(entry.path())? { - let entry = entry?; - if entry.is_dir() { - self.dump_repository( - entry.path(), &mut registry, &mut states - )?; - } - } - } - self.dump_repository_json(registry, states)?; - debug!("RRDP collector dump complete."); - Ok(()) - } - - /// Dumps the content of an RRDP repository. - #[allow(clippy::mutable_key_type)] - fn dump_repository( - &self, - repo_path: &Path, - registry: &mut DumpRegistry, - state_registry: &mut HashMap, - ) -> Result<(), Failed> { - let state_path = repo_path.join(RepositoryState::FILE_NAME); - let state = match RepositoryState::load_path(&state_path)? { - Some(state) => state, - None => return Ok(()) - }; - let target_path = registry.get_repo_path(Some(&state.rpki_notify)); - - fatal::create_dir_all(&target_path)?; - - Self::dump_tree(&repo_path.join("rsync"), &target_path)?; - - state_registry.insert(state.rpki_notify.clone(), state); - - Ok(()) - } - - /// Dumps a tree. - fn dump_tree( - source_path: &Path, - target_path: &Path, - ) -> Result<(), Failed> { - for entry in fatal::read_dir(source_path)? { - let entry = entry?; - if entry.is_dir() { - Self::dump_tree( - entry.path(), &target_path.join(entry.file_name()) - )?; - } - else if entry.is_file() { - let target_path = target_path.join(entry.file_name()); - fatal::create_parent_all(&target_path)?; - RepositoryObject::dump(entry.path(), &target_path)?; - } - } - Ok(()) - } - - /// Dumps the repositories.json. - #[allow(clippy::mutable_key_type)] - fn dump_repository_json( - &self, - repos: DumpRegistry, - states: HashMap, - ) -> Result<(), Failed> { - let path = repos.base_dir().join("repositories.json"); - if let Err(err) = fs::write( - &path, - JsonBuilder::build(|builder| { - builder.member_array("repositories", |builder| { - for (key, value) in repos.rrdp_uris() { - builder.array_object(|builder| { - builder.member_str( - "path", value - ); - builder.member_str("type", "rrdp"); - builder.member_str( - "rpkiNotify", - key - ); - - if let Some(state) = states.get(key) { - builder.member_raw("serial", state.serial); - builder.member_str("session", state.session); - if let Some(updated) = state.updated() { - builder.member_str( - "updated", - updated.to_rfc3339() - ); - } - } - }) - } - builder.array_object(|builder| { - builder.member_str("path", "rsync"); - builder.member_str("type", "rsync"); - }); - }) - }) - ) { - error!( "Failed to write {}: {}", path.display(), err); - return Err(Failed) - } - - Ok(()) - } - - /// Returns the path for a repository. - fn repository_path(&self, rpki_notify: &uri::Https) -> PathBuf { - let authority = rpki_notify.canonical_authority(); - let alg = DigestAlgorithm::sha256(); - let mut dir = String::with_capacity( - authority.len() - + alg.digest_len() - + 1 // one slash - ); - dir.push_str(&authority); - dir.push('/'); - crate::utils::str::append_hex( - alg.digest(rpki_notify.as_slice()).as_ref(), - &mut dir - ); - self.working_dir.join(dir) - } - -} - - -//------------ Run ----------------------------------------------------------- - -/// Using the collector for a single validation run. -#[derive(Debug)] -pub struct Run<'a> { - /// A reference to the underlying collector. - collector: &'a Collector, - - /// A set of the repositories we have updated already. - updated: RwLock>, - - /// The modules that are currently being updated. - /// - /// The value in the map is a mutex that is used to synchronize competing - /// attempts to update the module. Only the thread that has the mutex is - /// allowed to actually update. - running: RwLock>>>, - - /// The server metrics. - metrics: Mutex>, -} - -impl<'a> Run<'a> { - /// Creates a new runner. - fn new(collector: &'a Collector) -> Self { - Run { - collector, - updated: Default::default(), - running: Default::default(), - metrics: Mutex::new(Vec::new()), - } - } - - /// Loads a trust anchor certificate identified by an HTTPS URI. - /// - /// This just downloads the file. It is not cached since that is done - /// by the store anyway. - pub fn load_ta(&self, uri: &uri::Https) -> Option { - let mut response = match self.collector.http.response(uri, false) { - Ok(response) => response, - Err(_) => return None, - }; - if response.content_length() > self.collector.max_object_size { - warn!( - "Trust anchor certificate {} exceeds size limit. \ - Ignoring.", - uri - ); - return None - } - let mut bytes = Vec::new(); - if let Err(err) = response.copy_to(&mut bytes) { - info!("Failed to get trust anchor {}: {}", uri, err); - return None - } - Some(Bytes::from(bytes)) - } - - /// Returns whether an RRDP repository has been updated already. - /// - /// This does not mean the repository is actually up-to-date or even - /// available as an update may have failed. - pub fn was_updated(&self, notify_uri: &uri::Https) -> bool { - self.updated.read().get(notify_uri).is_some() - } - - /// Accesses an RRDP repository. - /// - /// This method blocks if the repository is deemed to need updating until - /// the update has finished. - /// - /// Returns the result of the update of the repository and whether this - /// is the first attempt at updating the repository. - pub fn load_repository( - &self, rpki_notify: &uri::Https - ) -> Result<(LoadResult, bool), Failed> { - // If we already tried updating, we can return already. - if let Some(repo) = self.updated.read().get(rpki_notify) { - return Ok((repo.clone(), false)) - } - - // Get a clone of the (arc-ed) mutex. Make a new one if there isn’t - // yet. - let mutex = { - self.running.write() - .entry(rpki_notify.clone()).or_default() - .clone() - }; - - // Acquire the mutex. Once we have it, see if the repository is - // up-to-date which happens if someone else had the mutex first. - let _lock = mutex.lock(); - if let Some(res) = self.updated.read().get(rpki_notify) { - return Ok((res.clone(), false)) - } - - // Now we can update the repository. - let repository = Repository::try_update(self, rpki_notify.clone())?; - - // Remove from running. - self.running.write().remove(rpki_notify); - - // Insert into updated map and also return. - self.updated.write().insert( - rpki_notify.clone(), repository.clone() - ); - Ok((repository, true)) - } - - /// Cleans up the RRDP collector. - /// - /// Deletes all RRDP repository trees that are not included in `retain`. - #[allow(clippy::mutable_key_type)] - pub fn cleanup( - &self, - retain: &mut HashSet - ) -> Result<(), Failed> { - // Add all the RRDP repositories we’ve tried during this run to be - // kept. - for uri in self.updated.read().keys() { - retain.insert(uri.clone()); - } - - for entry in fatal::read_dir(&self.collector.working_dir)? { - let entry = entry?; - if entry.is_file() { - // This isn’t supposed to be here. Make it go away. - if let Err(err) = fs::remove_file(entry.path()) { - error!( - "Fatal: failed to delete stray file {}: {}", - entry.path().display(), err - ); - return Err(Failed) - } - } - else if entry.is_dir() { - self.cleanup_authority(entry.path(), retain)?; - } - } - Ok(()) - } - - /// Cleans up an authority directory. - #[allow(clippy::mutable_key_type)] - pub fn cleanup_authority( - &self, - path: &Path, - retain: &HashSet - ) -> Result<(), Failed> { - for entry in fatal::read_dir(path)? { - let entry = entry?; - if entry.is_file() { - // This isn’t supposed to be here. Make it go away. - if let Err(err) = fs::remove_file(entry.path()) { - error!( - "Fatal: failed to delete stray file {}: {}", - entry.path().display(), err - ); - return Err(Failed) - } - } - else if entry.is_dir() { - self.cleanup_repository(entry.path(), retain)?; - } - } - Ok(()) - } - - /// Cleans up a repository directory. - #[allow(clippy::mutable_key_type)] - pub fn cleanup_repository( - &self, - path: &Path, - retain: &HashSet - ) -> Result<(), Failed> { - let state_path = path.join(RepositoryState::FILE_NAME); - let keep = match RepositoryState::load_path(&state_path)? { - Some(state) => { - retain.contains(&state.rpki_notify) - } - None => false, - }; - - if !keep { - debug!("Deleting unused RRDP tree {}.", path.display()); - if let Err(err) = fs::remove_dir_all(path) { - error!( - "Fatal: failed to delete tree {}: {}.", - path.display(), err - ); - return Err(Failed) - } - } - - Ok(()) - } - - /// Finishes the validation run. - /// - /// Updates `metrics` with the collector run’s metrics. - /// - /// If you are not interested in the metrics, you can simple drop the - /// value, instead. - pub fn done(self, metrics: &mut Metrics) { - metrics.rrdp = self.metrics.into_inner() - } -} - - -//------------ LoadResult ---------------------------------------------------- - -/// The result of trying to load a repository. -#[derive(Clone, Debug)] -pub enum LoadResult { - /// The update failed and there is no local copy. - Unavailable, - - /// The update failed and any content should now be considered stale. - Stale, - - /// The update failed but content should not be considered stale yet. - Current, - - /// The repository was successfully updated. - Updated(Repository), -} - - -//------------ Repository ---------------------------------------------------- - -/// Access to a single RRDP repository. -#[derive(Clone, Debug)] -pub struct Repository { - /// The rpkiNotify URI of the repository. - rpki_notify: uri::Https, - - /// The path where everything from this repository lives. - path: PathBuf, -} - -impl Repository { - /// Loads an object from the repository. - /// - /// The object is identified by its rsync URI. If the object doesn’t - /// exist, returns `None`. - pub fn load_object( - &self, - uri: &uri::Rsync - ) -> Result, Failed> { - RepositoryObject::load(&self.object_path(uri)).map(|maybe_obj| { - maybe_obj.map(|obj| obj.content) - }) - } - - /// Returns the path where all the objects live. - fn object_base(&self) -> PathBuf { - self.path.join("rsync") - } - - /// Returns the path for a given rsync URI. - fn object_path(&self, uri: &uri::Rsync) -> PathBuf { - self.path.join( - format!( - "rsync/{}/{}/{}", - uri.canonical_authority(), - uri.module_name(), - uri.path() - ) - ) - } - - /// Returns the path where all the objects live. - fn tmp_base(&self) -> PathBuf { - self.path.join("tmp") - } - - /// Returns the path for a given rsync URI. - fn tmp_object_path(&self, uri: &uri::Rsync) -> PathBuf { - self.path.join( - format!( - "tmp/{}/{}/{}", - uri.canonical_authority(), - uri.module_name(), - uri.path() - ) - ) - } -} - -/// # Update -/// -impl Repository { - /// Creates the repository by trying to update it. - fn try_update( - run: &Run, rpki_notify: uri::Https - ) -> Result { - // Check if the repository URI is dubious and return early if so. - if run.collector.filter_dubious && rpki_notify.has_dubious_authority() { - warn!( - "{}: Dubious host name. Not using the repository.", - rpki_notify - ); - return Ok(LoadResult::Unavailable) - } - - let path = run.collector.repository_path(&rpki_notify); - let repo = Repository { rpki_notify: rpki_notify.clone(), path }; - let state = match RepositoryState::load(&repo) { - Ok(state) => { - state - } - Err(_) => { - // Try to recover by removing the repository directory and - // starting from scratch. - if let Err(err) = fs::remove_dir_all(&repo.path) { - error!( - "Fatal: failed to delete corrupted repository \ - directory {}: {}", - repo.path.display(), err - ); - return Err(Failed) - } - None - } - }; - - let is_current = match state.as_ref() { - Some(state) => !state.is_expired(), - None => false, - }; - let best_before = state.as_ref().and_then(|state| state.best_before()); - - let start_time = SystemTime::now(); - let mut metrics = RrdpRepositoryMetrics::new(rpki_notify.clone()); - let is_updated = repo._update(run, state, &mut metrics)?; - metrics.duration = SystemTime::now().duration_since(start_time); - run.metrics.lock().push(metrics); - - if is_updated { - Ok(LoadResult::Updated(repo)) - } - else if is_current { - Ok(LoadResult::Current) - } - else if let Some(date) = best_before { - info!( - "RRDP {}: Update failed and \ - current copy is expired since {}.", - rpki_notify, date - ); - Ok(LoadResult::Stale) - } - else { - info!( - "RRDP {}: Update failed and there is no current copy.", - rpki_notify - ); - Ok(LoadResult::Unavailable) - } - } - - /// Performs the actual update. - /// - /// Returns `Ok(false)` if the update failed. - fn _update( - &self, - run: &Run, - mut state: Option, - metrics: &mut RrdpRepositoryMetrics, - ) -> Result { - let notify = match run.collector.http.notification_file( - &self.rpki_notify, - state.as_ref(), - &mut metrics.notify_status - ) { - Ok(Some(notify)) => notify, - Ok(None) => { - self.not_modified(run, state.as_mut())?; - return Ok(true) - } - Err(Failed) => { - return Ok(false) - } - }; - - metrics.serial = Some(notify.content.serial()); - metrics.session = Some(notify.content.session_id()); - match self.delta_update(run, state.as_ref(), ¬ify, metrics)? { - None => { - return Ok(true) - } - Some(reason) => { - metrics.snapshot_reason = Some(reason) - } - } - self.snapshot_update(run, ¬ify, metrics) - } - - /// Handle the case of a Not Modified response. - fn not_modified( - &self, - run: &Run, - state: Option<&mut RepositoryState>, - ) -> Result<(), Failed> { - debug!("RRDP {}: Not modified.", self.rpki_notify); - if let Some(state) = state { - state.touch(run.collector.fallback_time); - state.write(self)? - } - Ok(()) - } - - /// Performs a snapshot update and returns whether that succeeded. - /// - /// The URI and expected meta-data of the snapshot file are taken from - /// `notify`. - fn snapshot_update( - &self, - run: &Run, - notify: &Notification, - metrics: &mut RrdpRepositoryMetrics, - ) -> Result { - debug!("RRDP {}: updating from snapshot.", self.rpki_notify); - match SnapshotUpdate::new( - run.collector, self, notify, metrics - ).try_update() { - Ok(()) => { - debug!( - "RRDP {}: snapshot update completed.", - self.rpki_notify - ); - Ok(true) - } - Err(SnapshotError::Fatal) => Err(Failed), - Err(err) => { - warn!( - "RRDP {}: failed to process snapshot file {}: {}", - self.rpki_notify, notify.content.snapshot().uri(), err - ); - Ok(false) - } - } - } - - /// Performs a delta update of the RRDP repository. - /// - /// Takes information of the available deltas from `notify`. May not do - /// anything at all if the repository is up-to-date. Returns whether the - /// update succeeded. If `Ok(Some(reason))` is returned, a snapshot update - /// should be tried next because of the reason given. - fn delta_update( - &self, - run: &Run, - state: Option<&RepositoryState>, - notify: &Notification, - metrics: &mut RrdpRepositoryMetrics, - ) -> Result, Failed> { - let state = match state { - Some(state) => state, - None => return Ok(Some(SnapshotReason::NewRepository)), - }; - - let deltas = match self.calc_deltas(¬ify.content, state) { - Ok(deltas) => deltas, - Err(reason) => return Ok(Some(reason)), - }; - - if deltas.len() > run.collector.max_delta_count { - debug!( - "RRDP: {}: Too many delta steps required ({})", - self.rpki_notify, deltas.len() - ); - return Ok(Some(SnapshotReason::TooManyDeltas)) - } - - if !deltas.is_empty() { - let count = deltas.len(); - for (i, info) in deltas.iter().enumerate() { - debug!( - "RRDP {}: Delta update step ({}/{}).", - self.rpki_notify, i + 1, count - ); - if let Some(reason) = DeltaUpdate::new( - run.collector, self, notify.content.session_id(), - info, metrics - ).try_update()? { - info!( - "RRDP {}: Delta update failed, \ - trying snapshot instead.", - self.rpki_notify - ); - return Ok(Some(reason)) - } - } - } - - // We are up-to-date now, so we can replace the state file with one - // reflecting the notification we’ve got originally. This will update - // the etag and last-modified data. - RepositoryState::from_notify( - self.rpki_notify.clone(), - notify, - run.collector.fallback_time - ).write(self)?; - - debug!("RRDP {}: Delta update completed.", self.rpki_notify); - Ok(None) - } - - /// Calculates the slice of deltas to follow for updating. - /// - /// Returns an empty slice if no update is necessary. - /// Returns a non-empty slice of the sequence of deltas to be applied. - /// Returns `None` if updating via deltas is not possible. - fn calc_deltas<'b>( - &self, - notify: &'b NotificationFile, - state: &RepositoryState - ) -> Result<&'b [rrdp::DeltaInfo], SnapshotReason> { - if notify.session_id() != state.session { - debug!("New session. Need to get snapshot."); - return Err(SnapshotReason::NewSession) - } - debug!("{}: Serials: us {}, them {}.", - self.rpki_notify, state.serial, notify.serial() - ); - if notify.serial() == state.serial { - return Ok(&[]); - } - - // If there is no last delta (remember, we have a different - // serial than the notification file) or if the last delta’s - // serial differs from that noted in the notification file, - // bail out. - if notify.deltas().last().map(|delta| delta.serial()) - != Some(notify.serial()) - { - debug!("Last delta serial differs from current serial."); - return Err(SnapshotReason::BadDeltaSet) - } - - let mut deltas = notify.deltas(); - let serial = match state.serial.checked_add(1) { - Some(serial) => serial, - None => return Err(SnapshotReason::LargeSerial) - }; - loop { - let first = match deltas.first() { - Some(first) => first, - None => { - debug!("Ran out of deltas."); - return Err(SnapshotReason::BadDeltaSet) - } - }; - match first.serial().cmp(&serial) { - cmp::Ordering::Greater => { - debug!("First delta is too new ({})", first.serial()); - return Err(SnapshotReason::OutdatedLocal) - } - cmp::Ordering::Equal => break, - cmp::Ordering::Less => deltas = &deltas[1..] - } - } - Ok(deltas) - } -} - - -//------------ SnapshotUpdate ------------------------------------------------ - -/// An update to a repository performed from a snapshot file. -/// -/// For this type of update, we collect all the published objects in the -/// repository’s temp directory and move it over to the object directory upon -/// success. -struct SnapshotUpdate<'a> { - /// The collector. - collector: &'a Collector, - - /// The repository. - repository: &'a Repository, - - /// The notification file pointing to the snapshot. - notify: &'a Notification, - - /// The metrics for the update. - metrics: &'a mut RrdpRepositoryMetrics, -} - -impl<'a> SnapshotUpdate<'a> { - pub fn new( - collector: &'a Collector, - repository: &'a Repository, - notify: &'a Notification, - metrics: &'a mut RrdpRepositoryMetrics, - ) -> Self { - SnapshotUpdate { collector, repository, notify, metrics } - } - - pub fn try_update(mut self) -> Result<(), SnapshotError> { - let response = match self.collector.http.response( - self.notify.content.snapshot().uri(), false - ) { - Ok(response) => { - self.metrics.payload_status = Some(response.status().into()); - if response.status() != StatusCode::OK { - return Err(response.status().into()) - } - else { - response - } - } - Err(err) => { - self.metrics.payload_status = Some(HttpStatus::Error); - return Err(err.into()) - } - }; - - let tmp_base = self.repository.tmp_base(); - if let Err(err) = fs::create_dir_all(&tmp_base) { - error!( - "Fatal: failed to create RRDP temporary directory {}: {}", - tmp_base.display(), err - ); - return Err(SnapshotError::Fatal) - } - - match self.try_process(response) { - Ok(()) => { - // Remove the state file to signal we are messing with the - // directory. - RepositoryState::remove(self.repository)?; - - // Delete the old object base and move the tmp base over. - // Note that the old object base may actually be missing. - let object_base = self.repository.object_base(); - if let Err(err) = fs::remove_dir_all(&object_base) { - if err.kind() != io::ErrorKind::NotFound { - error!( - "Fatal: failed to delete RRDP object \ - directory {}: {}", - object_base.display(), err - ); - return Err(SnapshotError::Fatal) - } - } - // We don’t need to ensure presence of the repository directory - // since the tmp_base lives there, too. So this really is - // just a rename. - if let Err(err) = fs::rename(&tmp_base, &object_base) { - error!( - "Fatal: failed to rename {} to {}: {}", - tmp_base.display(), object_base.display(), err - ); - return Err(SnapshotError::Fatal) - } - - // Write the new state. - RepositoryState::from_notify( - self.repository.rpki_notify.clone(), - self.notify, - self.collector.fallback_time - ).write(self.repository)?; - - Ok(()) - } - Err(err) => { - if let Err(err) = fs::remove_dir_all(&tmp_base) { - error!( - "Fatal: failed to delete RRDP temporary \ - directory {}:{}", - tmp_base.display(), err - ); - return Err(SnapshotError::Fatal) - } - Err(err) - } - } - } - - pub fn try_process( - &mut self, - response: HttpResponse - ) -> Result<(), SnapshotError> { - let mut reader = io::BufReader::new(HashRead::new(response)); - self.process(&mut reader)?; - let hash = reader.into_inner().into_hash(); - if verify_slices_are_equal( - hash.as_ref(), - self.notify.content.snapshot().hash().as_ref() - ).is_err() { - return Err(SnapshotError::HashMismatch) - } - Ok(()) - } -} - -impl<'a> ProcessSnapshot for SnapshotUpdate<'a> { - type Err = SnapshotError; - - fn meta( - &mut self, - session_id: Uuid, - serial: u64, - ) -> Result<(), Self::Err> { - if session_id != self.notify.content.session_id() { - return Err(SnapshotError::SessionMismatch { - expected: self.notify.content.session_id(), - received: session_id - }) - } - if serial != self.notify.content.serial() { - return Err(SnapshotError::SerialMismatch { - expected: self.notify.content.serial(), - received: serial - }) - } - Ok(()) - } - - fn publish( - &mut self, - uri: uri::Rsync, - data: &mut rrdp::ObjectReader, - ) -> Result<(), Self::Err> { - let path = self.repository.tmp_object_path(&uri); - let mut data = RrdpDataRead::new( - data, &uri, self.collector.max_object_size - ); - RepositoryObject::create(&path, &mut data).map_err(|io_err| { - match data.take_err() { - Some(data_err) => data_err.into(), - None => { - error!("{}", io_err); - SnapshotError::Fatal - } - } - }) - } -} - - -//------------ DeltaUpdate --------------------------------------------------- - -/// An update to a repository performed from a delta file. -/// -/// For this kind of update, we collect newly published and updated objects in -/// the repository’s temp directory and remember them as well as all deleted -/// objects and if everything is okay, copy files over to and delete files in -/// the object directory. -struct DeltaUpdate<'a> { - /// The collector. - collector: &'a Collector, - - /// The repository. - repository: &'a Repository, - - /// The session ID of the RRDP session. - session_id: Uuid, - - /// Information about the delta file. - info: &'a DeltaInfo, - - /// The metrics for the update. - metrics: &'a mut RrdpRepositoryMetrics, - - /// The URIs of objects to be copied from the temp to the object directory. - publish: HashSet, - - /// The URIs of objects to be deleted. - withdraw: HashSet, -} - -impl<'a> DeltaUpdate<'a> { - /// Creates a new delta update. - pub fn new( - collector: &'a Collector, - repository: &'a Repository, - session_id: Uuid, - info: &'a DeltaInfo, - metrics: &'a mut RrdpRepositoryMetrics, - ) -> Self { - DeltaUpdate { - collector, repository, session_id, info, metrics, - publish: Default::default(), withdraw: Default::default(), - } - } - - /// Tries to perform the delta update. - pub fn try_update( - mut self - ) -> Result, Failed> { - if let Err(err) = self.collect_changes() { - warn!( - "RRDP {}: failed to process delta: {}", - self.repository.rpki_notify, err - ); - return Ok(Some(SnapshotReason::ConflictingDelta)) - } - self.apply_changes()?; - Ok(None) - } - - fn collect_changes(&mut self) -> Result<(), DeltaError> { - let response = match self.collector.http.response( - self.info.uri(), false - ) { - Ok(response) => { - self.metrics.payload_status = Some(response.status().into()); - if response.status() != StatusCode::OK { - return Err(response.status().into()) - } - else { - response - } - } - Err(err) => { - self.metrics.payload_status = Some(HttpStatus::Error); - return Err(err.into()) - } - }; - self.try_process(response)?; - if let Some(uri) = self.publish.intersection(&self.withdraw).next() { - return Err(DeltaError::ObjectRepeated { uri: uri.clone() }) - } - Ok(()) - } - - /// Applies the collected changes. - /// - /// If anything goes wrong here, we will have to wipe the repository as it - /// will be in an inconsistent state. - fn apply_changes(self) -> Result<(), Failed> { - // First, delete the state file to mark the repository as being in - // flux. - RepositoryState::remove(self.repository)?; - - if self._apply_changes().is_err() { - if let Err(err) = fs::remove_dir_all(&self.repository.path) { - error!( - "Fatal: failed to delete repository directory {}: {}", - self.repository.path.display(), err - ); - } - return Err(Failed) - } - - // Write a state file to reflect how far we’ve come. - RepositoryState::new_for_delta( - self.repository.rpki_notify.clone(), - self.session_id, - self.info.serial(), - self.collector.fallback_time - ).write(self.repository)?; - Ok(()) - } - - /// Actually applies the changes, not dealing with errors. - fn _apply_changes(&self) -> Result<(), Failed> { - for uri in &self.publish { - let tmp_path = self.repository.tmp_object_path(uri); - let obj_path = self.repository.object_path(uri); - if let Err(err) = fs::remove_file(&obj_path) { - if err.kind() != io::ErrorKind::NotFound { - error!( - "Fatal: failed to delete {}: {}", - obj_path.display(), err - ); - return Err(Failed) - } - } - if let Some(parent) = obj_path.parent() { - if let Err(err) = fs::create_dir_all(parent) { - error!( - "Fatal: failed to create directory {}: {}", - parent.display(), err - ); - return Err(Failed) - } - } - if let Err(err) = fs::rename(&tmp_path, &obj_path) { - error!( - "Fatal: failed to move {} to {}: {}", - tmp_path.display(), obj_path.display(), err - ); - return Err(Failed) - } - } - for uri in &self.withdraw { - let obj_path = self.repository.object_path(uri); - if let Err(err) = fs::remove_file(&obj_path) { - if err.kind() != io::ErrorKind::NotFound { - error!( - "Fatal: failed to delete {}: {}", - obj_path.display(), err - ); - return Err(Failed) - } - } - } - Ok(()) - } - - pub fn try_process( - &mut self, - response: HttpResponse - ) -> Result<(), DeltaError> { - let mut reader = io::BufReader::new(HashRead::new(response)); - self.process(&mut reader)?; - let hash = reader.into_inner().into_hash(); - if verify_slices_are_equal( - hash.as_ref(), - self.info.hash().as_ref() - ).is_err() { - return Err(DeltaError::DeltaHashMismatch) - } - Ok(()) - } - - /// Checks whether the object has the given hash. - /// - /// If the hash is `None`, actually checks that the object doesn’t - /// exist. - fn check_hash( - &self, uri: &uri::Rsync, expected: Option - ) -> Result<(), DeltaError> { - let current = RepositoryObject::load_hash( - &self.repository.object_path(uri) - )?; - if current == expected { - Ok(()) - } - else if expected.is_none() { - Err(DeltaError::ObjectAlreadyPresent { uri: uri.clone() }) - } - else if current.is_none() { - Err(DeltaError::MissingObject { uri: uri.clone() }) - } - else { - Err(DeltaError::ObjectHashMismatch { uri: uri.clone() }) - } - } -} - -impl<'a> ProcessDelta for DeltaUpdate<'a> { - type Err = DeltaError; - - fn meta( - &mut self, session_id: Uuid, serial: u64 - ) -> Result<(), Self::Err> { - if session_id != self.session_id { - return Err(DeltaError::SessionMismatch { - expected: self.session_id, - received: session_id - }) - } - if serial != self.info.serial() { - return Err(DeltaError::SerialMismatch { - expected: self.info.serial(), - received: serial - }) - } - Ok(()) - } - - fn publish( - &mut self, - uri: uri::Rsync, - hash: Option, - data: &mut rrdp::ObjectReader<'_> - ) -> Result<(), Self::Err> { - self.check_hash(&uri, hash)?; - let mut data = RrdpDataRead::new( - data, &uri, self.collector.max_object_size - ); - let path = self.repository.tmp_object_path(&uri); - RepositoryObject::create(&path, &mut data).map_err(|io_err| { - match data.take_err() { - Some(data_err) => data_err.into(), - None => { - error!("{}", io_err); - DeltaError::Fatal - } - } - })?; - if !self.publish.insert(uri.clone()) { - return Err(DeltaError::ObjectRepeated { uri }) - } - Ok(()) - } - - fn withdraw( - &mut self, - uri: uri::Rsync, - hash: rrdp::Hash - ) -> Result<(), Self::Err> { - self.check_hash(&uri, Some(hash))?; - if !self.withdraw.insert(uri.clone()) { - return Err(DeltaError::ObjectRepeated { uri }) - } - Ok(()) - } -} - - -//------------ HttpClient ---------------------------------------------------- - -/// The HTTP client for updating RRDP repositories. -#[derive(Debug)] -struct HttpClient { - /// The (blocking) reqwest client. - /// - /// This will be of the error variant until `ignite` has been called. Yes, - /// that is not ideal but - client: Result>, - - /// The base directory for storing copies of responses if that is enabled. - response_dir: Option, - - /// The timeout for requests. - timeout: Option, -} - -impl HttpClient { - /// Creates a new, not-yet-ignited client based on the config. - pub fn new(config: &Config) -> Result { - - // Deal with the reqwest’s TLS features by defining a creator - // function for the two cases. - #[cfg(not(feature = "native-tls"))] - fn create_builder() -> ClientBuilder { - Client::builder().use_rustls_tls() - } - - #[cfg(feature = "native-tls")] - fn create_builder() -> ClientBuilder { - Client::builder().use_native_tls() - } - - let mut builder = create_builder(); - builder = builder.user_agent(&config.rrdp_user_agent); - builder = builder.tcp_keepalive(config.rrdp_tcp_keepalive); - builder = builder.timeout(None); // Set per request. - if let Some(timeout) = config.rrdp_connect_timeout { - builder = builder.connect_timeout(timeout); - } - if let Some(addr) = config.rrdp_local_addr { - builder = builder.local_address(addr) - } - for path in &config.rrdp_root_certs { - builder = builder.add_root_certificate( - Self::load_cert(path)? - ); - } - for proxy in &config.rrdp_proxies { - let proxy = match Proxy::all(proxy) { - Ok(proxy) => proxy, - Err(err) => { - error!( - "Invalid rrdp-proxy '{}': {}", proxy, err - ); - return Err(Failed) - } - }; - builder = builder.proxy(proxy); - } - Ok(HttpClient { - client: Err(Some(builder)), - response_dir: config.rrdp_keep_responses.clone(), - timeout: config.rrdp_timeout, - }) - } - - /// Ignites the client. - /// - /// This _must_ be called before any other methods can be called. It must - /// be called after any potential fork on Unix systems because it spawns - /// threads. - pub fn ignite(&mut self) -> Result<(), Failed> { - let builder = match self.client.as_mut() { - Ok(_) => return Ok(()), - Err(builder) => match builder.take() { - Some(builder) => builder, - None => { - error!("Previously failed to initialize HTTP client."); - return Err(Failed) - } - } - }; - let client = match builder.build() { - Ok(client) => client, - Err(err) => { - error!("Failed to initialize HTTP client: {}.", err); - return Err(Failed) - } - }; - self.client = Ok(client); - Ok(()) - } - - /// Loads a WebPKI trusted certificate. - fn load_cert(path: &Path) -> Result { - let mut file = match fs::File::open(path) { - Ok(file) => file, - Err(err) => { - error!( - "Cannot open rrdp-root-cert file '{}': {}'", - path.display(), err - ); - return Err(Failed); - } - }; - let mut data = Vec::new(); - if let Err(err) = io::Read::read_to_end(&mut file, &mut data) { - error!( - "Cannot read rrdp-root-cert file '{}': {}'", - path.display(), err - ); - return Err(Failed); - } - Certificate::from_pem(&data).map_err(|err| { - error!( - "Cannot decode rrdp-root-cert file '{}': {}'", - path.display(), err - ); - Failed - }) - } - - /// Returns a reference to the reqwest client. - /// - /// # Panics - /// - /// The method panics if the client hasn’t been ignited yet. - fn client(&self) -> &Client { - self.client.as_ref().expect("HTTP client has not been ignited") - } - - /// Performs an HTTP GET request for the given URI. - /// - /// If keeping responses is enabled, the response is written to a file - /// corresponding to the URI. If the resource behind the URI changes over - /// time and this change should be tracked, set `multi` to `true` to - /// include the current time in the file name. - pub fn response( - &self, - uri: &uri::Https, - multi: bool, - ) -> Result { - self._response(uri, self.client().get(uri.as_str()), multi) - } - - /// Creates a response from a request builder. - fn _response( - &self, - uri: &uri::Https, - mut request: RequestBuilder, - multi: bool - ) -> Result { - if let Some(timeout) = self.timeout { - request = request.timeout(timeout); - } - request.send().and_then(|response| { - response.error_for_status() - }).map(|response| { - HttpResponse::create(response, uri, &self.response_dir, multi) - }) - } - - /// Requests, parses, and returns the given RRDP notification file. - /// - /// The value referred to by `status` will be updated to the received - /// status code or `HttpStatus::Error` if the request failed. - /// - /// Returns the notification file on success. - pub fn notification_file( - &self, - uri: &uri::Https, - state: Option<&RepositoryState>, - status: &mut HttpStatus, - ) -> Result, Failed> { - let mut request = self.client().get(uri.as_str()); - if let Some(state) = state { - if let Some(etag) = state.etag.as_ref() { - request = request.header( - header::IF_NONE_MATCH, etag.as_ref() - ); - } - if let Some(ts) = state.last_modified_ts { - if let Some(datetime) = Utc.timestamp_opt(ts, 0).single() { - request = request.header( - header::IF_MODIFIED_SINCE, - format_http_date(datetime) - ); - } - } - } - let response = match self._response(uri, request, true) { - Ok(response) => { - *status = response.status().into(); - response - } - Err(err) => { - warn!("RRDP {}: {}", uri, err); - *status = HttpStatus::Error; - return Err(Failed) - } - }; - - if response.status() == StatusCode::NOT_MODIFIED { - Ok(None) - } - else if response.status() != StatusCode::OK { - warn!( - "RRDP {}: Getting notification file failed with status {}", - uri, response.status() - ); - Err(Failed) - } - else { - Notification::from_response(uri, response).map(Some) - } - } -} - - -//------------ HttpResponse -------------------------------------------------- - -/// Wraps a reqwest response for added features. -struct HttpResponse { - /// The wrapped reqwest response. - response: Response, - - /// A file to also store read data into. - file: Option, -} - -impl HttpResponse { - /// Creates a new response wrapping a reqwest reponse. - /// - /// If `response_dir` is some path, the response will also be written to - /// a file under this directory based on `uri`. Each URI component - /// starting with the authority will be a directory name. If `multi` is - /// `false` the last component will be the file name. If `multi` is - /// `true` the last component will be a directory, too, and the file name - /// will be the ISO timestamp of the current time. - pub fn create( - response: Response, - uri: &uri::Https, - response_dir: &Option, - multi: bool - ) -> Self { - HttpResponse { - response, - file: response_dir.as_ref().and_then(|base| { - Self::open_file(base, uri, multi) - }) - } - } - - /// Opens the file mirroring file. - /// - /// See [`create`][Self::create] for the rules. - fn open_file( - base: &Path, uri: &uri::Https, multi: bool - ) -> Option { - let path = Self::get_mirror_path(base, uri, multi)?; - let parent = match path.parent() { - Some(parent) => parent, - None => { - warn!( - "Cannot keep HTTP response; \ - URI translated into a bad path '{}'", - path.display() - ); - return None - } - }; - if let Err(err) = fs::create_dir_all(parent) { - warn!( - "Cannot keep HTTP response; \ - creating directory {} failed: {}", - parent.display(), err - ); - return None - } - fs::File::create(&path).map_err(|err| { - warn!( - "Cannot keep HTTP response; \ - creating file {} failed: {}", - path.display(), err - ); - Failed - }).ok() - } - - /// Returns a mirror path for given HTTPS URI. - /// - /// Returns `None` if the path is fishy in any way. Specifically, we only - /// allow printable ASCII characters and no path segments . and .. - fn get_mirror_path( - base: &Path, uri: &uri::Https, multi: bool - ) -> Option { - let rel = &uri.as_str()[8..]; - for segment in rel.split('/') { - if segment == "." || segment == ".." { - warn!( - "Cannot keep HTTP response; \ - URI '{}' translates into an ambiguous path.", - uri - ); - return None - } - } - let path = base.join(rel); - if !path.starts_with(base) { - warn!( - "Cannot keep HTTP response; \ - URI '{}' translates into an illegal path.", - uri - ); - return None - } - if multi { - Some(path.join(Utc::now().to_rfc3339())) - } - else { - Some(path) - } - } - - /// Returns the value of the content length header if present. - pub fn content_length(&self) -> Option { - self.response.content_length() - } - - /// Copies the full content of the response to the given writer. - pub fn copy_to( - &mut self, w: &mut W - ) -> Result { - // We cannot use the reqwest response’s `copy_to` impl because we need - // to use our own `io::Read` impl which sneaks in the copying to file - // if necessary. - io::copy(self, w) - } - - /// Returns the status code of the response. - pub fn status(&self) -> StatusCode { - self.response.status() - } - - /// Returns the value of the ETag header if present. - /// - /// The returned value is the complete content. That is, it includes the - /// quotation marks and a possible `W/` prefix. - /// - /// The method quietly returns `None` if the content of a header is - /// malformed or if there is more than one occurence of the header. - /// - /// The method returns a `Bytes` value as there is a good chance the - /// tag is short enough to be be inlined. - pub fn etag(&self) -> Option { - let mut etags = self.response.headers() - .get_all(header::ETAG) - .into_iter(); - let etag = etags.next()?; - if etags.next().is_some() { - return None - } - Self::parse_etag(etag.as_bytes()) - } - - /// Parses the ETag value. - /// - /// This is a separate function to make testing easier. - fn parse_etag(etag: &[u8]) -> Option { - // The tag starts with an optional case-sensitive `W/` followed by - // `"`. Let’s remember where the actual tag starts. - let start = if etag.starts_with(b"W/\"") { - 3 - } - else if etag.first() == Some(&b'"') { - 1 - } - else { - return None - }; - - // We need at least one more character. Empty tags are allowed. - if etag.len() <= start { - return None - } - - // The tag ends with a `"`. - if etag.last() != Some(&b'"') { - return None - } - - Some(Bytes::copy_from_slice(etag)) - } - - /// Returns the value of the Last-Modified header if present. - /// - /// The method quietly returns `None` if the content of a header is - /// malformed or if there is more than one occurence of the header. - pub fn last_modified(&self) -> Option> { - let mut iter = self.response.headers() - .get_all(header::LAST_MODIFIED) - .into_iter(); - let value = iter.next()?; - if iter.next().is_some() { - return None - } - parse_http_date(value.to_str().ok()?) - } -} - - -//--- Read - -impl io::Read for HttpResponse { - fn read(&mut self, buf: &mut [u8]) -> Result { - let res = self.response.read(buf)?; - if let Some(file) = self.file.as_mut() { - file.write_all(&buf[..res])?; - } - Ok(res) - } -} - - -//------------ Notification -------------------------------------------------- - -/// The notification file of an RRDP repository. -struct Notification { - /// The content of the file. - content: NotificationFile, - - /// The Etag value if provided. - etag: Option, - - /// The Last-Modified value if provided, - last_modified: Option>, -} - -impl Notification { - /// Creates a new notification from a successful HTTP response. - /// - /// Assumes that the response status was 200 OK. - fn from_response( - uri: &uri::Https, response: HttpResponse - ) -> Result { - let etag = response.etag(); - let last_modified = response.last_modified(); - let mut content = NotificationFile::parse( - io::BufReader::new(response) - ).map_err(|err| { - warn!("RRDP {}: {}", uri, err); - Failed - })?; - content.sort_deltas(); - Ok(Notification { content, etag, last_modified }) - } -} - - -//------------ RepositoryState ----------------------------------------------- - -/// The current state of an RRDP repository. -#[derive(Clone, Debug, Eq, Hash, PartialEq)] -struct RepositoryState { - /// The rpkiNotify URI of the repository. - pub rpki_notify: uri::Https, - - /// The UUID of the current session of repository. - pub session: Uuid, - - /// The serial number within the current session. - pub serial: u64, - - /// Unix timestamp in seconds of the time of last update of the server. - /// - /// We are not using `DateTime` here since we don’t need sub-second - /// precision and converting on the fly makes a value change when cycled - /// through the database as its sub-second portion is forced to zero. - pub updated_ts: i64, - - /// The time when we consider the stored data to be expired. - pub best_before_ts: i64, - - /// The value of the date header of the notification file if present. - /// - /// Given as the Unix timestamp in seconds. - pub last_modified_ts: Option, - - /// The value of the ETag header of the notification file if present. - /// - /// This is the complete tag including the quotation marks and possibly - /// the weak prefix. - pub etag: Option, -} - -impl RepositoryState { - /// Create the state for a delta update. - pub fn new_for_delta( - rpki_notify: uri::Https, - session: Uuid, - serial: u64, - fallback: FallbackTime, - ) -> Self { - RepositoryState { - rpki_notify, - session, - serial, - updated_ts: Utc::now().timestamp(), - best_before_ts: fallback.best_before().timestamp(), - last_modified_ts: None, - etag: None - } - } - - /// Create the state based on the notification file. - pub fn from_notify( - rpki_notify: uri::Https, - notify: &Notification, - fallback: FallbackTime, - ) -> Self { - RepositoryState { - rpki_notify, - session: notify.content.session_id(), - serial: notify.content.serial(), - updated_ts: Utc::now().timestamp(), - best_before_ts: fallback.best_before().timestamp(), - last_modified_ts: notify.last_modified.map(|x| x.timestamp()), - etag: notify.etag.clone(), - } - } - - /// Returns the last update time as proper timestamp. - /// - /// Returns `None` if the time cannot be converted into a timestamp for - /// some reason. - pub fn updated(&self) -> Option> { - Utc.timestamp_opt(self.updated_ts, 0).single() - } - - /// Returns the best before time as a proper timestamp. - /// - /// Returns `None` if the time cannot be converted into a timestamp for - /// some reason. - pub fn best_before(&self) -> Option> { - Utc.timestamp_opt(self.best_before_ts, 0).single() - } - - /// Sets the update time to now. - pub fn touch(&mut self, fallback: FallbackTime) { - self.updated_ts = Utc::now().timestamp(); - self.best_before_ts = fallback.best_before().timestamp(); - } - - /// Returns whether this repository should be considered expired. - /// - /// If in doubt, this will return `true`. - pub fn is_expired(&self) -> bool { - match self.best_before() { - Some(best_before) => Utc::now() > best_before, - None => true, - } - } - - /// Reads the state file of a repository. - pub fn load(repo: &Repository) -> Result, Failed> { - Self::load_path(&Self::file_path(repo)) - } - - /// Reads the state file at a path. - pub fn load_path(path: &Path) -> Result, Failed> { - let mut file = match File::open(path) { - Ok(file) => file, - Err(err) if err.kind() == io::ErrorKind::NotFound => { - return Ok(None) - } - Err(err) => { - warn!( - "Failed to open repository state file {}: {}", - path.display(), err - ); - return Err(Failed) - } - }; - Self::_read(&mut file) - .map(Some) - .map_err(|err| { - warn!( - "Failed to read repository state file {}: {}", - path.display(), err - ); - Failed - }) - } - - /// Deletes the state file of a repository. - pub fn remove(repo: &Repository) -> Result<(), Failed> { - fatal::remove_file(&Self::file_path(repo)) - } - - - /// Reads the state from an IO reader. - fn _read(reader: &mut impl io::Read) -> Result { - // Version number. Must be 0u8. - let version = u8::parse(reader)?; - if version != 0 { - return Err(io::Error::new( - io::ErrorKind::Other, - format!("unexpected version {}", version) - )) - } - - Ok(RepositoryState { - rpki_notify: Parse::parse(reader)?, - session: Parse::parse(reader)?, - serial: Parse::parse(reader)?, - updated_ts: Parse::parse(reader)?, - best_before_ts: Parse::parse(reader)?, - last_modified_ts: Parse::parse(reader)?, - etag: Parse::parse(reader)?, - }) - } - - /// Writes the state file of a repository. - pub fn write(&self, repo: &Repository) -> Result<(), Failed> { - let path = Self::file_path(repo); - let mut file = match File::create(&path) { - Ok(file) => file, - Err(err) => { - error!( - "Fatal: Failed to open repository state file {}: {}", - path.display(), err - ); - return Err(Failed) - } - }; - self._write(&mut file).map_err(|err| { - error!( - "Fatal: Failed to write repository state file {}: {}", - path.display(), err - ); - Failed - }) - } - - /// Writes the state to an IO writer. - fn _write( - &self, writer: &mut impl io::Write - ) -> Result<(), io::Error> { - 0u8.compose(writer)?; // version - self.rpki_notify.compose(writer)?; - self.session.compose(writer)?; - self.serial.compose(writer)?; - self.updated_ts.compose(writer)?; - self.best_before_ts.compose(writer)?; - self.last_modified_ts.compose(writer)?; - self.etag.compose(writer)?; - Ok(()) - } - - pub const FILE_NAME: &'static str = "state.bin"; - - pub fn file_path(repo: &Repository) -> PathBuf { - repo.path.join(Self::FILE_NAME) - } -} - - -//------------ RepositoryObject ---------------------------------------------- - -/// A repository object stored locally. -/// -/// In order to speed up updates, we store the RRDP hash of a file before its -/// content, if we understand it. -#[derive(Clone, Debug)] -struct RepositoryObject { - /// The RRDP hash of the object. - #[allow(dead_code)] - hash: rrdp::Hash, - - /// The content of the object. - content: Bytes, -} - -impl RepositoryObject { - /// Loads a repository object from the given path. - pub fn load(path: &Path) -> Result, Failed> { - let mut file = match Self::open(path)? { - Some(file) => file, - None => return Ok(None) - }; - Self::read(&mut file).map(Some).map_err(|err| { - error!("Fatal: failed to read {}: {}", path.display(), err); - Failed - }) - } - - /// Checks the hash of the objects. - pub fn load_hash(path: &Path) -> Result, Failed> { - let mut file = match Self::open(path)? { - Some(file) => file, - None => return Ok(None) - }; - rrdp::Hash::parse(&mut file).map(Some).map_err(|err| { - error!("Fatal: failed to read {}: {}", path.display(), err); - Failed - }) - } - - /// Opens the file for a repository object. - fn open(path: &Path) -> Result, Failed> { - match File::open(path) { - Ok(file) => Ok(Some(file)), - Err(err) if err.kind() == io::ErrorKind::NotFound => { - Ok(None) - } - Err(err) => { - error!("Fatal: failed to open {}: {}", path.display(), err); - Err(Failed) - } - } - } - - /// Reads the object from a reader. - fn read(source: &mut impl io::Read) -> Result { - let hash = rrdp::Hash::parse(source)?; - let mut content = Vec::new(); - source.read_to_end(&mut content)?; - Ok(RepositoryObject { - hash, - content: content.into(), - }) - } - - /// Writes a new object using everything from reader. - /// - /// This function returns an `io::Error` since the caller needs to be - /// able to suppress any error resulting from reading from `data` as - /// these are not in fact fatal. Any errors occurring while trying to - /// open the file and actually writing to it are still fatal. The - /// distinction is made by the caller by keeping track of what `data` - /// does. - pub fn create( - path: &Path, data: &mut impl io::Read - ) -> Result<(), io::Error> { - if let Some(parent) = path.parent() { - if let Err(err) = fs::create_dir_all(parent) { - return Err(io::Error::new( - io::ErrorKind::Other, - format!( - "Fatal: failed to create directory {}: {}.", - parent.display(), err - ) - )) - } - } - let mut target = match File::create(path) { - Ok(target) => target, - Err(err) => { - return Err(io::Error::new( - io::ErrorKind::Other, - format!( - "Fatal: failed to open file {}: {}", - path.display(), err - ) - )) - } - }; - Self::_create(data, &mut target).map_err(|err| { - io::Error::new( - io::ErrorKind::Other, - format!( - "Fatal: failed to write file {}: {}", path.display(), err - ) - ) - }) - } - - fn _create( - data: &mut impl io::Read, target: &mut File - ) -> Result<(), io::Error> { - rrdp::Hash::from([0u8; 32]).compose(target)?; - let mut reader = HashRead::new(data); - io::copy(&mut reader, target)?; - target.seek(SeekFrom::Start(0))?; - reader.into_hash().compose(target)?; - Ok(()) - } - - /// Dumps an object to the given target path. - /// - /// The dumped object will only contain the real object data, not the - /// added hash. - pub fn dump( - source_path: &Path, - target_path: &Path, - ) -> Result<(), Failed> { - let mut source = match File::open(source_path) { - Ok(source) => source, - Err(err) => { - error!( - "Failed to open source RRDP file {}: {}", - source_path.display(), err - ); - return Err(Failed) - } - }; - let mut target = match File::create(target_path) { - Ok(target) => target, - Err(err) => { - error!( - "Failed to create target RRDP file {}: {}", - target_path.display(), err - ); - return Err(Failed) - } - }; - if let Err(err) = rrdp::Hash::parse(&mut source) { - error!( - "Failed to read source RRDP file {}: {}", - source_path.display(), err - ); - return Err(Failed) - }; - if let Err(err) = io::copy(&mut source, &mut target) { - error!( - "Failed to copy source RRDP file {}: {}", - source_path.display(), err - ); - return Err(Failed) - } - Ok(()) - } -} - - -//------------ FallbackTime -------------------------------------------------- - -/// Parameters for calculating the best-before time of repositories. -#[derive(Clone, Copy, Debug)] -struct FallbackTime { - min: Duration, - max: Duration, -} - -impl FallbackTime { - /// Creates a new value from the configuration. - pub fn from_config(config: &Config) -> Self { - FallbackTime { - min: config.refresh, - max: cmp::max(2 * config.refresh, config.rrdp_fallback_time) - } - } - - /// Picks a best-before date for a repository updated around now. - pub fn best_before(self) -> DateTime { - // Saturating conversion between std’s and chrono’s Duration types. - Utc::now() + chrono::Duration::from_std( - rand::thread_rng().gen_range(self.min..self.max) - ).unwrap_or_else(|_| chrono::Duration::milliseconds(i64::MAX)) - } -} - - -//------------ HashRead ------------------------------------------------------ - -/// A reader wrapper that calculates the SHA-256 hash of all read data. -struct HashRead { - /// The wrapped reader. - reader: R, - - /// The context for hash calculation. - context: digest::Context, -} - -impl HashRead { - /// Creates a new hash reader. - pub fn new(reader: R) -> Self { - HashRead { - reader, - context: digest::Context::new(&digest::SHA256) - } - } - - /// Converts the reader into the hash. - pub fn into_hash(self) -> rrdp::Hash { - // Unwrap should be safe: This can only fail if the slice has the - // wrong length. - rrdp::Hash::try_from(self.context.finish()).unwrap() - } -} - - -impl io::Read for HashRead { - fn read(&mut self, buf: &mut [u8]) -> Result { - let res = self.reader.read(buf)?; - self.context.update(&buf[..res]); - Ok(res) - } -} - - -//------------ RrdpDataRead -------------------------------------------------- - -/// A reader that reads the data of objects in a snapshot or delta. -/// -/// The type ensures the size limit of objects and allows treating read errors -/// differently than write errors by storing any error and making it available -/// after the fact. -struct RrdpDataRead<'a, R> { - /// The wrapped reader. - reader: R, - - /// The URI of the object we are reading. - uri: &'a uri::Rsync, - - /// The number of bytes left to read. - /// - /// If this is `None` we are allowed to read an unlimited amount. - left: Option, - - /// The last error that happend. - err: Option, -} - -impl<'a, R> RrdpDataRead<'a, R> { - /// Creates a new read from necessary information. - /// - /// The returned value will wrap `reader`. The `uri` should be the rsync - /// URI of the published object. It is only used for generating meaningful - /// error messages. If `max_size` is some value, the size of the object - /// will be limited to that value in bytes. Larger objects lead to an - /// error. - pub fn new(reader: R, uri: &'a uri::Rsync, max_size: Option) -> Self { - RrdpDataRead { reader, uri, left: max_size, err: None } - } - - /// Returns a stored error if available. - /// - /// If it returns some error, that error happened during reading before - /// an `io::Error` was returned. - /// - /// The method takes the stored error and replaces it internally with - /// `None`. - pub fn take_err(&mut self) -> Option { - self.err.take() - } -} - -impl<'a, R: io::Read> io::Read for RrdpDataRead<'a, R> { - fn read(&mut self, buf: &mut [u8]) -> Result { - let res = match self.reader.read(buf) { - Ok(res) => res, - Err(err) => { - self.err = Some(RrdpDataReadError::Read(err)); - return Err(io::Error::new( - io::ErrorKind::Other, "reading data failed", - )) - } - }; - if let Some(left) = self.left { - let res64 = match u64::try_from(res) { - Ok(res) => res, - Err(_) => { - // If the usize doesn’t fit into a u64, things are - // definitely way too big. - self.left = Some(0); - self.err = Some( - RrdpDataReadError::LargeObject(self.uri.clone()) - ); - return Err(io::Error::new( - io::ErrorKind::Other, "size limit exceeded" - )) - } - }; - if res64 > left { - self.left = Some(0); - self.err = Some( - RrdpDataReadError::LargeObject(self.uri.clone()) - ); - Err(io::Error::new( - io::ErrorKind::Other, "size limit exceeded") - ) - } - else { - self.left = Some(left - res64); - Ok(res) - } - } - else { - Ok(res) - } - } -} - - -//------------ SnapshotReason ------------------------------------------------ - -/// The reason why a snapshot was used. -#[derive(Clone, Copy, Debug)] -pub enum SnapshotReason { - /// The respository is new. - NewRepository, - - /// A new session was encountered. - NewSession, - - /// The delta set in the notification file is inconsistent. - BadDeltaSet, - - /// A larger-than-supported serial number was encountered. - LargeSerial, - - /// The local copy is outdated and cannot be updated via deltas. - OutdatedLocal, - - /// A delta file was conflicting with locally stored data. - ConflictingDelta, - - /// There were too many deltas to process. - TooManyDeltas, -} - -impl SnapshotReason { - /// Returns a shorthand code for the reason. - pub fn code(self) -> &'static str { - use SnapshotReason::*; - - match self { - NewRepository => "new-repository", - NewSession => "new-session", - BadDeltaSet => "inconsistent-delta-set", - LargeSerial => "large-serial", - OutdatedLocal => "outdate-local", - ConflictingDelta => "conflicting-delta", - TooManyDeltas => "too-many-deltas", - } - } -} - - -//------------ HttpStatus ---------------------------------------------------- - -/// The result of an HTTP request. -#[derive(Clone, Copy, Debug)] -pub enum HttpStatus { - /// A response was received with the given status code. - Response(StatusCode), - - /// An error happened. - Error -} - -impl HttpStatus { - pub fn into_i16(self) -> i16 { - match self { - HttpStatus::Response(code) => code.as_u16() as i16, - HttpStatus::Error => -1 - } - } - - pub fn is_not_modified(self) -> bool { - matches!( - self, - HttpStatus::Response(code) if code == StatusCode::NOT_MODIFIED - ) - } - - pub fn is_success(self) -> bool { - matches!( - self, - HttpStatus::Response(code) if code.is_success() - ) - } -} - -impl From for HttpStatus { - fn from(code: StatusCode) -> Self { - HttpStatus::Response(code) - } -} - - -//============ Errors ======================================================== - -//------------ RrdpDataReadError --------------------------------------------- - -/// An error happened while reading object data. -/// -/// This covers both the case where the maximum allowed file size was -/// exhausted as well as where reading data failed. Neither of them is fatal, -/// so we need to process them separately. -#[derive(Debug)] -enum RrdpDataReadError { - LargeObject(uri::Rsync), - Read(io::Error), -} - - -//------------ SnapshotError ------------------------------------------------- - -/// An error happened during snapshot processing. -/// -/// This is an internal error type only necessary for error handling during -/// RRDP processing. Values will be logged and converted into failures or -/// negative results as necessary. -#[derive(Debug)] -enum SnapshotError { - Http(reqwest::Error), - HttpStatus(StatusCode), - Rrdp(rrdp::ProcessError), - SessionMismatch { - expected: Uuid, - received: Uuid - }, - SerialMismatch { - expected: u64, - received: u64, - }, - HashMismatch, - LargeObject(uri::Rsync), - Fatal, -} - -impl From for SnapshotError { - fn from(err: reqwest::Error) -> Self { - SnapshotError::Http(err) - } -} - -impl From for SnapshotError { - fn from(code: StatusCode) -> Self { - SnapshotError::HttpStatus(code) - } -} - -impl From for SnapshotError { - fn from(err: rrdp::ProcessError) -> Self { - SnapshotError::Rrdp(err) - } -} - -impl From for SnapshotError { - fn from(err: io::Error) -> Self { - SnapshotError::Rrdp(err.into()) - } -} - -impl From for SnapshotError { - fn from(err: RrdpDataReadError) -> Self { - match err { - RrdpDataReadError::LargeObject(uri) => { - SnapshotError::LargeObject(uri) - } - RrdpDataReadError::Read(err) => { - SnapshotError::Rrdp(err.into()) - } - } - } -} - -impl From for SnapshotError { - fn from(_: Failed) -> Self { - SnapshotError::Fatal - } -} - -impl fmt::Display for SnapshotError { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - match *self { - SnapshotError::Http(ref err) => err.fmt(f), - SnapshotError::HttpStatus(status) => { - write!(f, "HTTP {}", status) - } - SnapshotError::Rrdp(ref err) => err.fmt(f), - SnapshotError::SessionMismatch { ref expected, ref received } => { - write!( - f, - "session ID mismatch (notification_file: {}, \ - snapshot file: {}", - expected, received - ) - } - SnapshotError::SerialMismatch { ref expected, ref received } => { - write!( - f, - "serial number mismatch (notification_file: {}, \ - snapshot file: {}", - expected, received - ) - } - SnapshotError::HashMismatch => { - write!(f, "hash value mismatch") - } - SnapshotError::LargeObject(ref uri) => { - write!(f, "object exceeds size limit: {}", uri) - } - SnapshotError::Fatal => Ok(()) - } - } -} - -impl error::Error for SnapshotError { } - - -//------------ DeltaError ---------------------------------------------------- - -/// An error happened during delta processing. -/// -/// This is an internal error type only necessary for error handling during -/// RRDP processing. Values will be logged and converted into failures or -/// negative results as necessary. -#[derive(Debug)] -enum DeltaError { - Http(reqwest::Error), - HttpStatus(StatusCode), - Rrdp(rrdp::ProcessError), - SessionMismatch { - expected: Uuid, - received: Uuid - }, - SerialMismatch { - expected: u64, - received: u64, - }, - MissingObject { - uri: uri::Rsync, - }, - ObjectAlreadyPresent { - uri: uri::Rsync, - }, - ObjectHashMismatch { - uri: uri::Rsync, - }, - ObjectRepeated { - uri: uri::Rsync, - }, - DeltaHashMismatch, - LargeObject(uri::Rsync), - Fatal, -} - -impl From for DeltaError { - fn from(err: reqwest::Error) -> Self { - DeltaError::Http(err) - } -} - -impl From for DeltaError { - fn from(code: StatusCode) -> Self { - DeltaError::HttpStatus(code) - } -} - -impl From for DeltaError { - fn from(err: rrdp::ProcessError) -> Self { - DeltaError::Rrdp(err) - } -} - -impl From for DeltaError { - fn from(err: io::Error) -> Self { - DeltaError::Rrdp(err.into()) - } -} - -impl From for DeltaError { - fn from(err: RrdpDataReadError) -> Self { - match err { - RrdpDataReadError::LargeObject(uri) => { - DeltaError::LargeObject(uri) - } - RrdpDataReadError::Read(err) => { - DeltaError::Rrdp(err.into()) - } - } - } -} - -impl From for DeltaError { - fn from(_: Failed) -> Self { - DeltaError::Fatal - } -} - -impl fmt::Display for DeltaError { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - match *self { - DeltaError::Http(ref err) => err.fmt(f), - DeltaError::HttpStatus(status) => { - write!(f, "HTTP {}", status) - } - DeltaError::Rrdp(ref err) => err.fmt(f), - DeltaError::SessionMismatch { ref expected, ref received } => { - write!( - f, - "session ID mismatch (notification_file: {}, \ - snapshot file: {}", - expected, received - ) - } - DeltaError::SerialMismatch { ref expected, ref received } => { - write!( - f, - "serial number mismatch (notification_file: {}, \ - snapshot file: {}", - expected, received - ) - } - DeltaError::MissingObject { ref uri } => { - write!( - f, - "reference to missing object {}", - uri - ) - } - DeltaError::ObjectAlreadyPresent { ref uri } => { - write!( - f, - "attempt to add already present object {}", - uri - ) - } - DeltaError::ObjectHashMismatch { ref uri } => { - write!( - f, - "local object {} has different hash", - uri - ) - } - DeltaError::ObjectRepeated { ref uri } => { - write!(f, "object appears multiple times: {}", uri) - } - DeltaError::LargeObject(ref uri) => { - write!(f, "object exceeds size limit: {}", uri) - } - DeltaError::DeltaHashMismatch => { - write!(f, "delta file hash value mismatch") - } - DeltaError::Fatal => { - Ok(()) - } - } - } -} - -impl error::Error for DeltaError { } - - -//============ Testing ======================================================= - -#[cfg(test)] -mod test { - use super::*; - use std::str::FromStr; - - #[test] - fn write_read_repository_state() { - let orig = RepositoryState { - rpki_notify: uri::Https::from_str( - "https://foo.bar/bla/blubb" - ).unwrap(), - session: Uuid::nil(), - serial: 12, - updated_ts: 28, - best_before_ts: 892, - last_modified_ts: Some(23), - etag: Some(Bytes::copy_from_slice(b"23890")) - }; - let mut written = Vec::new(); - orig._write(&mut written).unwrap(); - let mut slice = written.as_slice(); - let decoded = RepositoryState::_read(&mut slice).unwrap(); - assert!(slice.is_empty()); - assert_eq!(orig, decoded); - } - - #[test] - #[cfg(unix)] - fn get_mirror_path() { - fn check(uri: &[u8], path_str: Option<&str>) { - let path = uri::Https::from_slice(uri).ok().and_then(|uri| { - HttpResponse::get_mirror_path( - Path::new("/base"), &uri, false - ) - }); - assert_eq!( - path.as_ref().map(|path| path.to_str().unwrap()), - path_str - ); - } - - check(b"https://foo.bar/baz", Some("/base/foo.bar/baz")); - check(b"https:///foo.bar/baz", None); - check(b"https://foo.bar/../baz", None); - check(b"https://foo.bar/foo/../baz", None); - check(b"https://foo.bar/foo/./baz", None); - check(b"https://foo.bar/foo/baz?query", None); - check(b"https://../foo/baz", None); - } -} - diff --git a/src/collector/rrdp/archive.rs b/src/collector/rrdp/archive.rs new file mode 100644 index 00000000..ae0bcbaa --- /dev/null +++ b/src/collector/rrdp/archive.rs @@ -0,0 +1,519 @@ +use std::{cmp, io, fs}; +use std::path::{Path, PathBuf}; +use std::sync::Arc; +use std::time::Duration; +use bytes::Bytes; +use chrono::{DateTime, TimeZone, Utc}; +use log::{error, warn}; +use rand::Rng; +use rpki::{rrdp, uri}; +use uuid::Uuid; +use crate::config::Config; +use crate::error::RunFailed; +use crate::utils::archive; +use crate::utils::archive::{ + Archive, ArchiveError, FetchError, OpenError, PublishError +}; +use crate::utils::binio::{Compose, Parse}; + + +//------------ RrdpArchive --------------------------------------------------- + +#[derive(Debug)] +pub struct RrdpArchive { + /// The path where everything from this repository lives. + path: Arc, + + /// The archive for the repository. + archive: archive::Archive, +} + +impl RrdpArchive { + pub fn create( + path: Arc + ) -> Result { + let archive = Archive::create(path.as_ref()).map_err(|err| { + archive_err(err, path.as_ref()) + })?; + Ok(Self { path, archive }) + } + + pub fn create_with_file( + file: fs::File, + path: Arc, + ) -> Result { + let archive = Archive::create_with_file(file).map_err(|err| { + archive_err(err, path.as_ref()) + })?; + Ok(Self { path, archive }) + } + + pub fn try_open(path: Arc) -> Result, RunFailed> { + let archive = match Archive::open(path.as_ref(), true) { + Ok(archive) => archive, + Err(OpenError::NotFound) => return Ok(None), + Err(OpenError::Archive(err)) => { + return Err(archive_err(err, path.as_ref())) + } + }; + Ok(Some(Self { path, archive })) + } + + pub fn open(path: Arc) -> Result { + let archive = archive::Archive::open( + path.as_ref(), false + ).map_err(|err| match err { + OpenError::NotFound => { + warn!( + "RRDP repository file {} not found.", path.display() + ); + RunFailed::retry() + } + OpenError::Archive(err) => archive_err(err, path.as_ref()) + })?; + Ok(Self { path, archive }) + } + + pub fn path(&self) -> &Arc { + &self.path + } +} + +impl RrdpArchive { + pub fn verify(path: &Path) -> Result<(), OpenError> { + let archive = archive::Archive::::open(path, false)?; + archive.verify()?; + Ok(()) + } + + /// Loads an object from the archive. + /// + /// The object is identified by its rsync URI. If the object doesn’t + /// exist, returns `None`. + pub fn load_object( + &self, + uri: &uri::Rsync + ) -> Result, RunFailed> { + let res = self.archive.fetch_bytes(uri.as_ref()); + match res { + Ok(res) => Ok(Some(res)), + Err(FetchError::NotFound) => Ok(None), + Err(FetchError::Archive(err)) => { + Err(archive_err(err, self.path.as_ref())) + } + } + } + + /// Loads the repository state. + /// + /// Returns an error if the state is missing or broken. + pub fn load_state(&self) -> Result { + let data = match self.archive.fetch(b"state") { + Ok(data) => data, + Err(archive::FetchError::NotFound) => { + return Err( + archive_err(ArchiveError::Corrupt, self.path.as_ref()) + ) + } + Err(archive::FetchError::Archive(err)) => { + return Err(archive_err(err, self.path.as_ref())) + } + }; + let mut data = data.as_ref(); + RepositoryState::parse(&mut data).map_err(|_| { + archive_err(ArchiveError::Corrupt, self.path.as_ref()) + }) + } + + /// Iterates over all the objects in the repository. + pub fn objects( + &self + ) -> Result< + impl Iterator> + '_, + RunFailed + > { + self.archive.objects().map(|iter| { + iter.filter_map(|item| { + let (name, _meta, data) = match item { + Ok(some) => some, + Err(ArchiveError::Corrupt) => { + return Some(Err(RunFailed::retry())) + } + Err(ArchiveError::Io(_)) => { + return Some(Err(RunFailed::fatal())) + } + }; + let name = uri::Rsync::from_bytes( + name.into_owned().into() + ).ok()?; + Some(Ok((name, data.into_owned().into()))) + }) + }).map_err(|err| { + match err { + ArchiveError::Corrupt => RunFailed::retry(), + ArchiveError::Io(_) => RunFailed::fatal(), + } + }) + } +} + +impl RrdpArchive { + /// Publishes a new object to the archie. + pub fn publish_object( + &mut self, + uri: &uri::Rsync, + content: &[u8] + ) -> Result<(), PublishError> { + self.archive.publish( + uri.as_ref(), + &RrdpObjectMeta::from_content(content), + content + ) + } + + /// Updates an object in the archive. + pub fn update_object( + &mut self, + uri: &uri::Rsync, + hash: rrdp::Hash, + content: &[u8] + ) -> Result<(), AccessError> { + Ok(self.archive.update( + uri.as_ref(), + &RrdpObjectMeta::from_content(content), + content, + |meta| { + if meta.hash == hash { + Ok(()) + } + else { + Err(HashMismatch) + } + } + )?) + } + + /// Deletes an object from the archive. + pub fn delete_object( + &mut self, uri: &uri::Rsync, hash: rrdp::Hash, + ) -> Result<(), AccessError> { + Ok(self.archive.delete( + uri.as_ref(), + |meta| { + if meta.hash == hash { + Ok(()) + } + else { + Err(HashMismatch) + } + } + )?) + } + + pub fn publish_state( + &mut self, state: &RepositoryState + ) -> Result<(), RunFailed> { + let mut buf = Vec::new(); + state.compose(&mut buf).expect("writing to vec failed"); + self.archive.publish( + b"state", &Default::default(), &buf + ).map_err(|err| match err { + archive::PublishError::Archive(ArchiveError::Io(err)) => { + error!( + "Fatal: Failed write to RRDP repository archive {}: {}", + self.path.display(), err + ); + RunFailed::fatal() + } + _ => { + warn!( + "Failed to write local RRDP repository state in {}.", + self.path.display() + ); + RunFailed::retry() + } + }) + } + + pub fn update_state( + &mut self, state: &RepositoryState + ) -> Result<(), RunFailed> { + let mut buf = Vec::new(); + state.compose(&mut buf).expect("writing to vec failed"); + self.archive.update( + b"state", &Default::default(), &buf, + |_| Ok(()) + ).map_err(|err| match err { + archive::AccessError::Archive(ArchiveError::Io(err)) => { + error!( + "Fatal: Failed write to RRDP repository archive {}: {}", + self.path.display(), err + ); + RunFailed::fatal() + } + _ => { + warn!( + "Failed to update local RRDP repository state in {}.", + self.path.display() + ); + RunFailed::retry() + } + }) + } +} + + +//------------ archive_err --------------------------------------------------- + +fn archive_err(err: ArchiveError, path: &Path) -> RunFailed { + match err { + ArchiveError::Corrupt => { + warn!( + "RRDP repository file '{}' is corrupt. \ + Deleting and starting again.", + path.display() + ); + match fs::remove_file(path) { + Ok(()) => { + RunFailed::retry() + } + Err(err) => { + warn!( + "Deleting RRDP repository archive '{}' failed: {}", + path.display(), + err + ); + RunFailed::fatal() + } + } + } + ArchiveError::Io(err) => { + error!( + "Fatal: Failed to access RRDP repository archive '{}': {}", + path.display(), + err + ); + RunFailed::fatal() + } + } +} + + +//------------ RrdpObjectMeta ------------------------------------------------ + +/// The meta data for an RRDP object. +#[derive(Clone, Copy, Debug)] +pub struct RrdpObjectMeta { + hash: rrdp::Hash, +} + +impl Default for RrdpObjectMeta { + fn default() -> Self { + Self { + hash: [0; 32].into(), + } + } +} + +impl RrdpObjectMeta { + pub fn from_content(content: &[u8]) -> Self { + Self { + hash: rrdp::Hash::from_data(content) + } + } +} + +impl archive::ObjectMeta for RrdpObjectMeta { + const SIZE: usize = 32; + + type ConsistencyError = HashMismatch; + + fn write( + &self, write: &mut archive::StorageWrite + ) -> Result<(), ArchiveError> { + write.write(self.hash.as_slice()) + } + + fn read( + read: &mut archive::StorageRead + ) -> Result { + Ok(Self { hash: read.read_array()?.into() }) + } +} + + +//------------ RepositoryState ----------------------------------------------- + +/// The current state of an RRDP repository. +#[derive(Clone, Debug, Eq, Hash, PartialEq)] +pub struct RepositoryState { + /// The rpkiNotify URI of the repository. + pub rpki_notify: uri::Https, + + /// The UUID of the current session of repository. + pub session: Uuid, + + /// The serial number within the current session. + pub serial: u64, + + /// Unix timestamp in seconds of the time of last update of the server. + /// + /// We are not using `DateTime` here since we don’t need sub-second + /// precision and converting on the fly makes a value change when cycled + /// through the database as its sub-second portion is forced to zero. + pub updated_ts: i64, + + /// The time when we consider the stored data to be expired. + pub best_before_ts: i64, + + /// The value of the date header of the notification file if present. + /// + /// Given as the Unix timestamp in seconds. + pub last_modified_ts: Option, + + /// The value of the ETag header of the notification file if present. + /// + /// This is the complete tag including the quotation marks and possibly + /// the weak prefix. + pub etag: Option, +} + +impl RepositoryState { + /// Reads the state from an IO reader. + fn parse(reader: &mut impl io::Read) -> Result { + // Version number. Must be 0u8. + let version = u8::parse(reader)?; + if version != 0 { + return Err(io::Error::new( + io::ErrorKind::Other, + format!("unexpected version {}", version) + )) + } + + Ok(RepositoryState { + rpki_notify: Parse::parse(reader)?, + session: Parse::parse(reader)?, + serial: Parse::parse(reader)?, + updated_ts: Parse::parse(reader)?, + best_before_ts: Parse::parse(reader)?, + last_modified_ts: Parse::parse(reader)?, + etag: Parse::parse(reader)?, + }) + } + + /// Composes the encoded state. + fn compose(&self, writer: &mut impl io::Write) -> Result<(), io::Error> { + 0u8.compose(writer)?; // version + self.rpki_notify.compose(writer)?; + self.session.compose(writer)?; + self.serial.compose(writer)?; + self.updated_ts.compose(writer)?; + self.best_before_ts.compose(writer)?; + self.last_modified_ts.compose(writer)?; + self.etag.compose(writer)?; + Ok(()) + } + + /// Returns the last update time as proper timestamp. + /// + /// Returns `None` if the time cannot be converted into a timestamp for + /// some reason. + pub fn updated(&self) -> Option> { + Utc.timestamp_opt(self.updated_ts, 0).single() + } + + /// Returns the best before time as a proper timestamp. + /// + /// Returns `None` if the time cannot be converted into a timestamp for + /// some reason. + pub fn best_before(&self) -> Option> { + Utc.timestamp_opt(self.best_before_ts, 0).single() + } + + /// Sets the update time to now. + pub fn touch(&mut self, fallback: FallbackTime) { + self.updated_ts = Utc::now().timestamp(); + self.best_before_ts = fallback.best_before().timestamp(); + } + + /// Returns whether this repository should be considered expired. + /// + /// If in doubt, this will return `true`. + pub fn is_expired(&self) -> bool { + match self.best_before() { + Some(best_before) => Utc::now() > best_before, + None => true, + } + } + + /// Returns the last modified time. + /// + /// Returns `None` if there we do not have a last modifed time or if + /// it cannot be converted from a Unix timestamp into a date-time. + pub fn last_modified(&self) -> Option> { + self.last_modified_ts.and_then(|ts| Utc.timestamp_opt(ts, 0).single()) + } +} + + +//------------ FallbackTime -------------------------------------------------- + +/// Parameters for calculating the best-before time of repositories. +#[derive(Clone, Copy, Debug)] +pub struct FallbackTime { + min: Duration, + max: Duration, +} + +impl FallbackTime { + /// Creates a new value from the configuration. + pub fn from_config(config: &Config) -> Self { + FallbackTime { + min: config.refresh, + max: cmp::max(2 * config.refresh, config.rrdp_fallback_time) + } + } + + /// Picks a best-before date for a repository updated around now. + pub fn best_before(self) -> DateTime { + // Saturating conversion between std’s and chrono’s Duration types. + Utc::now() + chrono::Duration::from_std( + rand::thread_rng().gen_range(self.min..self.max) + ).unwrap_or_else(|_| chrono::Duration::milliseconds(i64::MAX)) + } +} + + +//============ Errors ======================================================== + +//------------ HashMismatch -------------------------------------------------- + +#[derive(Debug)] +pub struct HashMismatch; + + +//------------ AccessError --------------------------------------------------- + +/// An error happened while publishing an object. +#[derive(Debug)] +pub enum AccessError { + /// The object does not exist. + NotFound, + + /// The object’s hash is wrong + HashMismatch, + + /// An error happened while trying to access the archive. + Archive(ArchiveError), +} + +impl From> for AccessError { + fn from(err: archive::AccessError) -> Self { + match err { + archive::AccessError::NotFound => AccessError::NotFound, + archive::AccessError::Inconsistent(_) => AccessError::HashMismatch, + archive::AccessError::Archive(err) => AccessError::Archive(err), + } + } +} + diff --git a/src/collector/rrdp/base.rs b/src/collector/rrdp/base.rs new file mode 100644 index 00000000..1666e8d5 --- /dev/null +++ b/src/collector/rrdp/base.rs @@ -0,0 +1,1013 @@ +use std::{cmp, fs, io}; +use std::collections::{HashMap, HashSet}; +use std::path::{Path, PathBuf}; +use std::sync::{Arc, Weak}; +use std::time::SystemTime; +use bytes::Bytes; +use log::{debug, error, info, warn}; +use rpki::uri; +use rpki::crypto::DigestAlgorithm; +use rpki::rrdp::{DeltaInfo, NotificationFile}; +use tempfile::NamedTempFile; +use crate::config::Config; +use crate::error::{Fatal, RunFailed}; +use crate::metrics::{Metrics, RrdpRepositoryMetrics}; +use crate::utils::fatal; +use crate::utils::archive::{ArchiveError, OpenError}; +use crate::utils::dump::DumpRegistry; +use crate::utils::json::JsonBuilder; +use crate::utils::sync::{Mutex, RwLock}; +use crate::utils::uri::UriExt; +use super::archive::{FallbackTime, RrdpArchive, RepositoryState}; +use super::http::{HttpClient, HttpStatus}; +use super::update::{ + DeltaUpdate, Notification, SnapshotError, SnapshotReason, SnapshotUpdate +}; + + +//------------ Collector ----------------------------------------------------- + +/// The local copy of RPKI repositories synchronized via RRDP. +#[derive(Debug)] +pub struct Collector { + /// The path of the directory we store all our data in. + working_dir: PathBuf, + + /// The HTTP client. + http: HttpClient, + + /// Various configuration options. + config: RrdpConfig, +} + +impl Collector { + /// Initializes the RRDP collector without creating a value. + /// + /// This function is called implicitely by [`new`][Collector::new]. + pub fn init(config: &Config) -> Result<(), Fatal> { + let _ = Self::create_working_dir(config)?; + Ok(()) + } + + /// Creates the working dir and returns its path. + fn create_working_dir(config: &Config) -> Result { + let working_dir = config.cache_dir.join("rrdp"); + + if config.fresh { + if let Err(err) = fs::remove_dir_all(&working_dir) { + if err.kind() != io::ErrorKind::NotFound { + error!( + "Failed to delete RRDP working directory at {}: {}", + working_dir.display(), err + ); + return Err(Fatal) + } + } + } + + if let Err(err) = fs::create_dir_all(&working_dir) { + error!( + "Failed to create RRDP working directory {}: {}.", + working_dir.display(), err + ); + return Err(Fatal); + } + Ok(working_dir) + } + + /// Creates a new RRDP collector. + /// + /// Returns `Ok(None)` if RRDP was disabled. + pub fn new(config: &Config) -> Result, Fatal> { + if config.disable_rrdp { + return Ok(None) + } + Ok(Some(Self { + working_dir: Self::create_working_dir(config)?, + http: HttpClient::new(config)?, + config: config.into(), + })) + } + + pub fn ignite(&mut self) -> Result<(), Fatal> { + self.http.ignite() + } + + /// Sanitizes the stored data. + /// + /// Validates all repository archives and deletes those that are corrupt. + pub fn sanitize(&self) -> Result<(), Fatal> { + for entry in fatal::read_dir(&self.working_dir)? { + let entry = entry?; + if !entry.is_dir() || entry.file_name() == "tmp" { + continue; + } + for entry in fatal::read_dir(entry.path())? { + let entry = entry?; + if !entry.is_file() { + continue; + } + match RrdpArchive::verify(entry.path()) { + Ok(()) | Err(OpenError::NotFound) => { } + Err(OpenError::Archive(ArchiveError::Io(err))) => { + error!( + "Fatal: Failed to read RRDP repository archive\ + {}: {}", + entry.path().display(), err + ); + return Err(Fatal) + } + Err(OpenError::Archive(ArchiveError::Corrupt)) => { + match fs::remove_file(entry.path()) { + Ok(()) => { + info!( + "Deleting corrupt RRDP repository \ + archive {}.", + entry.path().display() + ); + } + Err(err) => { + error!( + "Fatal: Failed to delete corrupt RRDP \ + repository archive {}: {}.", + entry.path().display(), err + ); + return Err(Fatal) + } + } + } + } + } + } + Ok(()) + } + + pub fn start(&self) -> Run { + Run::new(self) + } + + #[allow(clippy::mutable_key_type)] + pub fn dump(&self, dir: &Path) -> Result<(), Fatal> { + let dir = dir.join("rrdp"); + debug!("Dumping RRDP collector content to {}", dir.display()); + let mut registry = DumpRegistry::new(dir); + let mut states = HashMap::new(); + for entry in fatal::read_dir(&self.working_dir)? { + let entry = entry?; + if !entry.is_dir() || entry.file_name() == "tmp" { + continue; + } + for entry in fatal::read_dir(entry.path())? { + let entry = entry?; + if entry.is_file() { + if let Err(err) = self.dump_repository( + entry.into_path().into(), &mut registry, &mut states + ) { + if err.is_fatal() { + return Err(Fatal) + } + } + } + } + } + self.dump_repository_json(registry, states)?; + debug!("RRDP collector dump complete."); + Ok(()) + } + + /// Dumps the content of an RRDP repository. + #[allow(clippy::mutable_key_type)] + fn dump_repository( + &self, + repo_path: Arc, + registry: &mut DumpRegistry, + state_registry: &mut HashMap, + ) -> Result<(), RunFailed> { + let archive = RrdpArchive::open(repo_path.clone())?; + let state = archive.load_state()?; + let target_path = registry.get_repo_path(Some(&state.rpki_notify)); + let object_path = target_path.join("rsync"); + + for item in archive.objects()? { + let (uri, data) = item?; + let path = object_path.join( + uri.canonical_module().as_ref() + ).join(uri.path()); + fatal::create_parent_all(&path)?; + fatal::write_file(&path, &data)?; + } + + state_registry.insert(state.rpki_notify.clone(), state); + Ok(()) + } + + /// Dumps the repositories.json. + #[allow(clippy::mutable_key_type)] + fn dump_repository_json( + &self, + repos: DumpRegistry, + states: HashMap, + ) -> Result<(), Fatal> { + let path = repos.base_dir().join("repositories.json"); + if let Err(err) = fs::write( + &path, + JsonBuilder::build(|builder| { + builder.member_array("repositories", |builder| { + for (key, value) in repos.rrdp_uris() { + builder.array_object(|builder| { + builder.member_str( + "path", value + ); + builder.member_str("type", "rrdp"); + builder.member_str( + "rpkiNotify", + key + ); + + if let Some(state) = states.get(key) { + builder.member_raw("serial", state.serial); + builder.member_str("session", state.session); + if let Some(updated) = state.updated() { + builder.member_str( + "updated", + updated.to_rfc3339() + ); + } + } + }) + } + builder.array_object(|builder| { + builder.member_str("path", "rsync"); + builder.member_str("type", "rsync"); + }); + }) + }) + ) { + error!( "Failed to write {}: {}", path.display(), err); + return Err(Fatal) + } + + Ok(()) + } +} + +impl Collector { + /// Returns the path for a repository. + fn repository_path( + &self, rpki_notify: &uri::Https + ) -> Result { + let mut path = self.working_dir.clone(); + path.push(rpki_notify.canonical_authority().as_ref()); + if let Err(err) = fs::create_dir_all(&path) { + error!( + "Failed to create RRDP archive directory {}: {}", + path.display(), err + ); + return Err(Fatal) + } + + let alg = DigestAlgorithm::sha256(); + let mut dir = String::with_capacity( + alg.digest_len() + + 4 // ".bin" + ); + crate::utils::str::append_hex( + alg.digest(rpki_notify.as_slice()).as_ref(), + &mut dir + ); + dir.push_str(".bin"); + path.push(&dir); + Ok(path) + } + + fn temp_file( + &self + ) -> Result<(fs::File, Arc), Fatal> { + let base = self.working_dir.join("tmp"); + if let Err(err) = fs::create_dir_all(&base) { + error!( + "Failed to create RRDP temporary directory {}: {}", + base.display(), err + ); + return Err(Fatal) + } + let file = match NamedTempFile::new_in(&base) { + Ok(file) => file, + Err(err) => { + error!( + "Failed to create temporary RRDP file in {}: {}", + base.display(), err + ); + return Err(Fatal) + } + }; + let (file, path) = file.keep().map_err(|err| { + error!( + "Failed to create temporary RRDP file {}: {}", + err.file.path().display(), err.error + ); + Fatal + })?; + Ok((file, path.into())) + } + + pub(super) fn http(&self) -> &HttpClient { + &self.http + } + + pub(super) fn config(&self) -> &RrdpConfig { + &self.config + } +} + + +//------------ Run ----------------------------------------------------------- + +/// Using the collector for a single validation run. +#[derive(Debug)] +pub struct Run<'a> { + /// A reference to the underlying collector. + collector: &'a Collector, + + /// A set of the repositories we have updated already. + updated: RwLock>>, + + /// The modules that are currently being updated. + /// + /// The value in the map is a mutex that is used to synchronize competing + /// attempts to update the module. Only the thread that has the mutex is + /// allowed to actually update. + running: RwLock>>>, + + /// The server metrics. + metrics: Mutex>, +} + +impl<'a> Run<'a> { + /// Creates a new runner. + fn new(collector: &'a Collector) -> Self { + Self { + collector, + updated: Default::default(), + running: Default::default(), + metrics: Default::default(), + } + } + + /// Loads a trust anchor certificate identified by an HTTPS URI. + /// + /// This just downloads the file. It is not cached since that is done + /// by the store anyway. + pub fn load_ta(&self, uri: &uri::Https) -> Option { + let mut response = match self.collector.http.response(uri, false) { + Ok(response) => response, + Err(_) => return None, + }; + if response.content_length() > self.collector.config().max_object_size { + warn!( + "Trust anchor certificate {} exceeds size limit. \ + Ignoring.", + uri + ); + return None + } + let mut bytes = Vec::new(); + if let Err(err) = response.copy_to(&mut bytes) { + info!("Failed to get trust anchor {}: {}", uri, err); + return None + } + Some(Bytes::from(bytes)) + } + + /// Returns whether an RRDP repository has been updated already. + /// + /// This does not mean the repository is actually up-to-date or even + /// available as an update may have failed. + pub fn was_updated(&self, rpki_notify: &uri::Https) -> bool { + self.updated.read().contains_key(rpki_notify) + } + + /// Accesses an RRDP repository. + /// + /// This method blocks if the repository is deemed to need updating until + /// the update has finished. + /// + /// Returns the result of the update of the repository and whether this + /// is the first attempt at updating the repository. + pub fn load_repository( + &self, rpki_notify: &uri::Https + ) -> Result<(LoadResult, bool), RunFailed> { + // If we already tried updating, we can return already. + if let Some(repo) = self.updated.read().get(rpki_notify) { + return Ok((repo.read()?, false)) + } + + // Get a clone of the (arc-ed) mutex. Make a new one if there isn’t + // yet. + let mutex = { + self.running.write() + .entry(rpki_notify.clone()).or_default() + .clone() + }; + + // Acquire the mutex. Once we have it, see if the repository is + // up-to-date which happens if someone else had the mutex first. + let _lock = mutex.lock(); + if let Some(repo) = self.updated.read().get(rpki_notify) { + self.running.write().remove(rpki_notify); + return Ok((repo.read()?, false)) + } + + // Now we can update the repository. But we only do this if we like + // the URI. + let (repo, metrics) = if + self.collector.config().filter_dubious + && rpki_notify.has_dubious_authority() + { + let mut metrics = RrdpRepositoryMetrics::new(rpki_notify.clone()); + metrics.notify_status = HttpStatus::Rejected; + warn!( + "{}: Dubious host name. Not using the repository.", + rpki_notify + ); + (LoadResult::Unavailable, metrics) + } + else { + RepositoryUpdate::new( + self.collector, rpki_notify + )?.try_update()? + }; + + // Insert metrics. + self.metrics.lock().push(metrics); + + let res = repo.read()?; + + // Insert into updated map. + self.updated.write().insert(rpki_notify.clone(), repo); + + // Remove from running. + self.running.write().remove(rpki_notify); + + Ok((res, true)) + } + + #[allow(clippy::mutable_key_type)] + pub fn cleanup( + &self, + retain: &mut HashSet + ) -> Result<(), Fatal> { + // Add all the RRDP repositories we’ve tried during this run to be + // kept. + for uri in self.updated.read().keys() { + retain.insert(uri.clone()); + } + + for entry in fatal::read_dir(&self.collector.working_dir)? { + let entry = entry?; + if entry.is_file() { + // This isn’t supposed to be here. Make it go away. + if let Err(err) = fs::remove_file(entry.path()) { + error!( + "Fatal: failed to delete stray file {}: {}", + entry.path().display(), err + ); + return Err(Fatal) + } + } + else if entry.is_dir() { + if entry.file_name() == "tmp" { + self.cleanup_tmp(entry.path())? + } + else { + self.cleanup_authority(entry.path(), retain)?; + } + } + } + + Ok(()) + } + + /// Cleans up an authority directory. + pub fn cleanup_tmp( + &self, + path: &Path, + ) -> Result<(), Fatal> { + for entry in fatal::read_dir(path)? { + let entry = entry?; + if entry.is_file() { + if let Err(err) = fs::remove_file(entry.path()) { + error!( + "Fatal: failed to delete file {}: {}", + entry.path().display(), err + ); + return Err(Fatal) + } + } + else if let Err(err) = fs::remove_dir_all(entry.path()) { + error!( + "Fatal: failed to delete directory {}: {}", + entry.path().display(), err + ); + return Err(Fatal) + } + } + Ok(()) + } + + /// Cleans up an authority directory. + #[allow(clippy::mutable_key_type)] + pub fn cleanup_authority( + &self, + path: &Path, + retain: &HashSet + ) -> Result<(), Fatal> { + for entry in fatal::read_dir(path)? { + let entry = entry?; + if entry.is_file() { + let entry_path = Arc::new(entry.into_path()); + let keep = match self.keep_repository( + entry_path.clone(), retain + ) { + Ok(some) => some, + Err(err) if err.should_retry() => false, + Err(_) => return Err(Fatal), + }; + if !keep { + if let Err(err) = fs::remove_file(entry_path.as_ref()) { + error!( + "Fatal: failed to delete file {}: {}", + entry_path.display(), err + ); + return Err(Fatal) + } + } + } + else { + // This isn’t supposed to be here. Make it go away. + if let Err(err) = fs::remove_dir_all(entry.path()) { + error!( + "Fatal: failed to delete stray directory {}: {}", + entry.path().display(), err + ); + return Err(Fatal) + } + } + } + Ok(()) + } + + /// Returns whether we should keep a repository. + #[allow(clippy::mutable_key_type)] + pub fn keep_repository( + &self, + path: Arc, + retain: &HashSet + ) -> Result { + let archive = RrdpArchive::open(path)?; + let state = archive.load_state()?; + Ok(retain.contains(&state.rpki_notify)) + } + + pub fn done(self, metrics: &mut Metrics) { + metrics.rrdp = self.metrics.into_inner() + } +} + + +//------------ RrdpConfig ---------------------------------------------------- + +/// The configuration of the RRDP collector. +#[derive(Clone, Debug)] +pub struct RrdpConfig { + /// Whether to filter dubious authorities in notify URIs. + pub filter_dubious: bool, + + /// RRDP repository fallback timeout. + /// + /// This is the time since the last known update of an RRDP repository + /// before it is considered non-existant. + pub fallback_time: FallbackTime, + + /// The maximum allowed size for published objects. + pub max_object_size: Option, + + /// The maximum number of deltas we process before using a snapshot. + pub max_delta_count: usize, +} + +impl<'a> From<&'a Config> for RrdpConfig { + fn from(config: &'a Config) -> Self { + Self { + filter_dubious: !config.allow_dubious_hosts, + fallback_time: FallbackTime::from_config(config), + max_object_size: config.max_object_size, + max_delta_count: config.rrdp_max_delta_count, + } + } +} + + +//------------ LoadResult ---------------------------------------------------- + +/// The result of trying to load a repository. +#[derive(Clone, Debug)] +pub enum LoadResult> { + /// The update failed and there is no local copy. + Unavailable, + + /// The update failed and any content should now be considered stale. + Stale, + + /// The update failed but content should not be considered stale yet. + Current, + + /// The repository was successfully updated. + Updated(Repo), +} + +impl LoadResult { + fn read(&self) -> Result { + match self { + Self::Unavailable => Ok(LoadResult::Unavailable), + Self::Stale => Ok(LoadResult::Stale), + Self::Current => Ok(LoadResult::Current), + Self::Updated(repo) => Ok(LoadResult::Updated(repo.read()?)), + } + } +} + + +//------------ ReadRepository ------------------------------------------------ + +/// Read access to a single RRDP repository. +#[derive(Debug)] +pub struct ReadRepository { + /// The archive for the repository. + archive: RrdpArchive, +} + +impl ReadRepository { + fn new(repository: &Repository) -> Result { + Ok(Self { + archive: RrdpArchive::open(repository.path.clone())?, + }) + } + + /// Loads an object from the repository. + /// + /// The object is identified by its rsync URI. If the object doesn’t + /// exist, returns `None`. + pub fn load_object( + &self, + uri: &uri::Rsync + ) -> Result, RunFailed> { + self.archive.load_object(uri) + } +} + + +//------------ Repository ---------------------------------------------------- + +/// A single RRDP repository. +#[derive(Debug)] +struct Repository { + /// The path where everything from this repository lives. + path: Arc, + + /// A reader for the repository. + /// + /// This is a weak arc so it gets dropped if nobody is using it any more. + read: Mutex>, +} + +impl Repository { + fn new(path: impl Into>) -> Self { + Self { + path: path.into(), + read: Mutex::new(Weak::new()) + } + } + + pub fn read(&self) -> Result, RunFailed> { + let mut read = self.read.lock(); + if let Some(res) = read.upgrade() { + return Ok(res) + } + let res = Arc::new(ReadRepository::new(self)?); + *read = Arc::downgrade(&res); + Ok(res) + } +} + + +//------------ RepositoryUpdate ---------------------------------------------- + +/// All the state necessary to update a repository. +struct RepositoryUpdate<'a> { + collector: &'a Collector, + path: Arc, + rpki_notify: &'a uri::Https, + metrics: RrdpRepositoryMetrics, +} + +impl<'a> RepositoryUpdate<'a> { + fn new( + collector: &'a Collector, rpki_notify: &'a uri::Https, + ) -> Result { + Ok(Self { + collector, + path: Arc::new(collector.repository_path(rpki_notify)?), + rpki_notify, + metrics: RrdpRepositoryMetrics::new(rpki_notify.clone()), + }) + } + + /// Creates the repository by trying to update it. + fn try_update( + mut self + ) -> Result<(LoadResult, RrdpRepositoryMetrics), RunFailed> { + let current = match RrdpArchive::try_open(self.path.clone()) { + Ok(Some(archive)) => { + let state = archive.load_state()?; + Some((archive, state)) + } + Ok(None) => None, + Err(err) => { + if err.should_retry() { + // RrdpArchive::try_open should already have deleted the + // file, so we can happily pretend it never existed. + None + } + else { + return Err(err) + } + } + }; + + let start_time = SystemTime::now(); + let is_current = match current.as_ref() { + Some(current) => !current.1.is_expired(), + None => false, + }; + let best_before = current.as_ref().and_then(|current| + current.1.best_before() + ); + + let is_updated = self.update(current)?; + + self.metrics.duration = SystemTime::now().duration_since(start_time); + + let res = if is_updated { + LoadResult::Updated(Repository::new(self.path)) + } + else if is_current { + LoadResult::Current + } + else if let Some(date) = best_before { + info!( + "RRDP {}: Update failed and \ + current copy is expired since {}.", + self.rpki_notify, date + ); + LoadResult::Stale + } + else { + info!( + "RRDP {}: Update failed and there is no current copy.", + self.rpki_notify + ); + LoadResult::Unavailable + }; + Ok((res, self.metrics)) + } + + /// Performs the actual update. + /// + /// Returns `Ok(false)` if the update failed. + fn update( + &mut self, + current: Option<(RrdpArchive, RepositoryState)>, + ) -> Result { + let notify = match Notification::get( + &self.collector.http, self.rpki_notify, + current.as_ref().map(|x| &x.1), + &mut self.metrics.notify_status, + ) { + Ok(Some(notify)) => notify, + Ok(None) => { + self.not_modified(current)?; + return Ok(true) + } + Err(_) => return Ok(false) + }; + + self.metrics.serial = Some(notify.content().serial()); + self.metrics.session = Some(notify.content().session_id()); + + if let Some((archive, state)) = current { + match self.delta_update(¬ify, archive, state)? { + None => { + return Ok(true) + } + Some(reason) => { + self.metrics.snapshot_reason = Some(reason) + } + } + } + else { + self.metrics.snapshot_reason = Some(SnapshotReason::NewRepository); + } + self.snapshot_update(¬ify) + } + + /// Handle the case of a Not Modified response. + fn not_modified( + &mut self, + current: Option<(RrdpArchive, RepositoryState)>, + ) -> Result<(), RunFailed> { + info!("RRDP {}: Not modified.", self.rpki_notify); + if let Some((mut archive, mut state)) = current { + state.touch(self.collector.config().fallback_time); + archive.update_state(&state)?; + } + Ok(()) + } + + /// Performs a snapshot update and returns whether that succeeded. + /// + /// The URI and expected meta-data of the snapshot file are taken from + /// `notify`. + fn snapshot_update( + &mut self, + notify: &Notification, + ) -> Result { + debug!("RRDP {}: updating from snapshot.", self.rpki_notify); + let (file, path) = self.collector.temp_file()?; + let mut archive = RrdpArchive::create_with_file(file, path.clone())?; + if let Err(err) = SnapshotUpdate::new( + self.collector, &mut archive, notify, &mut self.metrics + ).try_update() { + if let SnapshotError::RunFailed(err) = err { + debug!("RRDP {}: snapshot update failed.", self.rpki_notify); + return Err(err) + } + else { + warn!( + "RRDP {}: failed to process snapshot file {}: {}", + self.rpki_notify, notify.content().snapshot().uri(), err + ); + return Ok(false) + } + } + + // XXX There is a possible issue here: Someone could unlink the + // temp file and replace it with something new and we will now + // copy that to the final location. + + if let Err(err) = fs::remove_file(self.path.as_ref()) { + if !matches!(err.kind(), io::ErrorKind::NotFound) { + error!( + "Fatal: Failed to delete outdated RRDP repository file \ + {}: {}", + self.path.display(), err + ); + return Err(RunFailed::fatal()) + } + } + drop(archive); + if let Err(err) = fs::rename(path.as_ref(), self.path.as_ref()) { + error!( + "Fatal: Failed to move new RRDP repository file {} to {}: {}", + path.display(), self.path.display(), err + ); + return Err(RunFailed::fatal()) + } + + debug!("RRDP {}: snapshot update completed.", self.rpki_notify); + Ok(true) + } + + /// Performs a delta update of the RRDP repository. + /// + /// Takes information of the available deltas from `notify`. May not do + /// anything at all if the repository is up-to-date. Returns whether the + /// update succeeded. If `Ok(Some(reason))` is returned, a snapshot update + /// should be tried next because of the reason given. + fn delta_update( + &mut self, + notify: &Notification, + mut archive: RrdpArchive, + state: RepositoryState, + ) -> Result, RunFailed> { + let deltas = match self.calc_deltas(notify.content(), &state) { + Ok(deltas) => deltas, + Err(reason) => return Ok(Some(reason)), + }; + + if !deltas.is_empty() { + let count = deltas.len(); + for (i, info) in deltas.iter().enumerate() { + debug!( + "RRDP {}: Delta update step ({}/{}).", + self.rpki_notify, i + 1, count + ); + if let Err(err) = DeltaUpdate::new( + self.collector, &mut archive, + notify.content().session_id(), + info, &mut self.metrics + ).try_update() { + warn!( + "RRDP {}: failed to process delta: {}", + self.rpki_notify, err, + ); + return Ok(Some(SnapshotReason::ConflictingDelta)) + } + } + } + + // We are up-to-date now, so we can replace the state file with one + // reflecting the notification we’ve got originally. This will update + // the etag and last-modified data. + if let Err(err) = archive.update_state( + ¬ify.to_repository_state(self.collector.config.fallback_time) + ) { + if err.should_retry() { + return Ok(Some(SnapshotReason::CorruptArchive)) + } + else { + return Err(err) + } + } + + debug!("RRDP {}: Delta update completed.", self.rpki_notify); + Ok(None) + } + + /// Calculates the slice of deltas to follow for updating. + /// + /// Returns an empty slice if no update is necessary. + /// Returns a non-empty slice of the sequence of deltas to be applied. + fn calc_deltas<'b>( + &self, + notify: &'b NotificationFile, + state: &RepositoryState + ) -> Result<&'b [DeltaInfo], SnapshotReason> { + if notify.session_id() != state.session { + debug!("New session. Need to get snapshot."); + return Err(SnapshotReason::NewSession) + } + debug!("{}: Serials: us {}, them {}.", + self.rpki_notify, state.serial, notify.serial() + ); + if notify.serial() == state.serial { + return Ok(&[]); + } + + // If there is no last delta (remember, we have a different + // serial than the notification file) or if the last delta’s + // serial differs from that noted in the notification file, + // bail out. + if notify.deltas().last().map(|delta| delta.serial()) + != Some(notify.serial()) + { + debug!("Last delta serial differs from current serial."); + return Err(SnapshotReason::BadDeltaSet) + } + + let mut deltas = notify.deltas(); + let serial = match state.serial.checked_add(1) { + Some(serial) => serial, + None => return Err(SnapshotReason::LargeSerial) + }; + loop { + let first = match deltas.first() { + Some(first) => first, + None => { + debug!("Ran out of deltas."); + return Err(SnapshotReason::BadDeltaSet) + } + }; + match first.serial().cmp(&serial) { + cmp::Ordering::Greater => { + debug!("First delta is too new ({})", first.serial()); + return Err(SnapshotReason::OutdatedLocal) + } + cmp::Ordering::Equal => break, + cmp::Ordering::Less => deltas = &deltas[1..] + } + } + + if deltas.len() > self.collector.config.max_delta_count { + debug!( + "RRDP: {}: Too many delta steps required ({})", + self.rpki_notify, deltas.len() + ); + return Err(SnapshotReason::TooManyDeltas) + } + + Ok(deltas) + } + +} + diff --git a/src/collector/rrdp/http.rs b/src/collector/rrdp/http.rs new file mode 100644 index 00000000..1926f23b --- /dev/null +++ b/src/collector/rrdp/http.rs @@ -0,0 +1,486 @@ +use std::{fs, io}; +use std::io::Write; +use std::path::{Path, PathBuf}; +use std::time::Duration; +use bytes::Bytes; +use chrono::{DateTime, Utc}; +use log::{error, warn}; +use reqwest::header; +use reqwest::{Certificate, Proxy, StatusCode}; +use reqwest::blocking::{Client, ClientBuilder, RequestBuilder, Response}; +use rpki::uri; +use crate::config::Config; +use crate::error::Fatal; +use crate::utils::date::{format_http_date, parse_http_date}; + + +//------------ HttpClient ---------------------------------------------------- + +/// The HTTP client for updating RRDP repositories. +#[derive(Debug)] +pub struct HttpClient { + /// The (blocking) reqwest client. + /// + /// This will be of the error variant until `ignite` has been called. Yes, + /// that is not ideal but + client: Result>, + + /// The base directory for storing copies of responses if that is enabled. + response_dir: Option, + + /// The timeout for requests. + timeout: Option, +} + +impl HttpClient { + /// Creates a new, not-yet-ignited client based on the config. + pub fn new(config: &Config) -> Result { + + // Deal with the reqwest’s TLS features by defining a creator + // function for the two cases. + #[cfg(not(feature = "native-tls"))] + fn create_builder() -> ClientBuilder { + Client::builder().use_rustls_tls() + } + + #[cfg(feature = "native-tls")] + fn create_builder() -> ClientBuilder { + Client::builder().use_native_tls() + } + + let mut builder = create_builder(); + builder = builder.user_agent(&config.rrdp_user_agent); + builder = builder.tcp_keepalive(config.rrdp_tcp_keepalive); + builder = builder.timeout(None); // Set per request. + if let Some(timeout) = config.rrdp_connect_timeout { + builder = builder.connect_timeout(timeout); + } + if let Some(addr) = config.rrdp_local_addr { + builder = builder.local_address(addr) + } + for path in &config.rrdp_root_certs { + builder = builder.add_root_certificate( + Self::load_cert(path)? + ); + } + for proxy in &config.rrdp_proxies { + let proxy = match Proxy::all(proxy) { + Ok(proxy) => proxy, + Err(err) => { + error!( + "Invalid rrdp-proxy '{}': {}", proxy, err + ); + return Err(Fatal) + } + }; + builder = builder.proxy(proxy); + } + Ok(HttpClient { + client: Err(Some(builder)), + response_dir: config.rrdp_keep_responses.clone(), + timeout: config.rrdp_timeout, + }) + } + + /// Ignites the client. + /// + /// This _must_ be called before any other methods can be called. It must + /// be called after any potential fork on Unix systems because it spawns + /// threads. + pub fn ignite(&mut self) -> Result<(), Fatal> { + let builder = match self.client.as_mut() { + Ok(_) => return Ok(()), + Err(builder) => match builder.take() { + Some(builder) => builder, + None => { + error!("Previously failed to initialize HTTP client."); + return Err(Fatal) + } + } + }; + let client = match builder.build() { + Ok(client) => client, + Err(err) => { + error!("Failed to initialize HTTP client: {}.", err); + return Err(Fatal) + } + }; + self.client = Ok(client); + Ok(()) + } + + /// Loads a WebPKI trusted certificate. + fn load_cert(path: &Path) -> Result { + let mut file = match fs::File::open(path) { + Ok(file) => file, + Err(err) => { + error!( + "Cannot open rrdp-root-cert file '{}': {}'", + path.display(), err + ); + return Err(Fatal); + } + }; + let mut data = Vec::new(); + if let Err(err) = io::Read::read_to_end(&mut file, &mut data) { + error!( + "Cannot read rrdp-root-cert file '{}': {}'", + path.display(), err + ); + return Err(Fatal); + } + Certificate::from_pem(&data).map_err(|err| { + error!( + "Cannot decode rrdp-root-cert file '{}': {}'", + path.display(), err + ); + Fatal + }) + } + + /// Returns a reference to the reqwest client. + /// + /// # Panics + /// + /// The method panics if the client hasn’t been ignited yet. + fn client(&self) -> &Client { + self.client.as_ref().expect("HTTP client has not been ignited") + } + + /// Performs an HTTP GET request for the given URI. + /// + /// If keeping responses is enabled, the response is written to a file + /// corresponding to the URI. If the resource behind the URI changes over + /// time and this change should be tracked, set `multi` to `true` to + /// include the current time in the file name. + pub fn response( + &self, + uri: &uri::Https, + multi: bool, + ) -> Result { + self._response(uri, self.client().get(uri.as_str()), multi) + } + + pub fn conditional_response( + &self, + uri: &uri::Https, + etag: Option<&Bytes>, + last_modified: Option>, + multi: bool, + ) -> Result { + let mut request = self.client().get(uri.as_str()); + if let Some(etag) = etag { + request = request.header( + header::IF_NONE_MATCH, etag.as_ref() + ); + } + if let Some(last_modified) = last_modified { + request = request.header( + header::IF_MODIFIED_SINCE, + format_http_date(last_modified) + ); + } + self._response(uri, request, multi) + } + + /// Creates a response from a request builder. + fn _response( + &self, + uri: &uri::Https, + mut request: RequestBuilder, + multi: bool + ) -> Result { + if let Some(timeout) = self.timeout { + request = request.timeout(timeout); + } + request.send().and_then(|response| { + response.error_for_status() + }).map(|response| { + HttpResponse::create(response, uri, &self.response_dir, multi) + }) + } + + /* + /// Requests, parses, and returns the given RRDP notification file. + /// + /// The value referred to by `status` will be updated to the received + /// status code or `HttpStatus::Error` if the request failed. + /// + /// Returns the notification file on success. + pub fn notification_file( + &self, + uri: &uri::Https, + state: Option<&RepositoryState>, + status: &mut HttpStatus, + ) -> Result, Failed> { + let mut request = self.client().get(uri.as_str()); + if let Some(state) = state { + if let Some(etag) = state.etag.as_ref() { + request = request.header( + header::IF_NONE_MATCH, etag.as_ref() + ); + } + if let Some(ts) = state.last_modified_ts { + if let Some(datetime) = Utc.timestamp_opt(ts, 0).single() { + request = request.header( + header::IF_MODIFIED_SINCE, + format_http_date(datetime) + ); + } + } + } + let response = match self._response(uri, request, true) { + Ok(response) => { + *status = response.status().into(); + response + } + Err(err) => { + warn!("RRDP {}: {}", uri, err); + *status = HttpStatus::Error; + return Err(Failed) + } + }; + + if response.status() == StatusCode::NOT_MODIFIED { + Ok(None) + } + else if response.status() != StatusCode::OK { + warn!( + "RRDP {}: Getting notification file failed with status {}", + uri, response.status() + ); + Err(Failed) + } + else { + Notification::from_response(uri, response).map(Some) + } + } + */ +} + + +//------------ HttpResponse -------------------------------------------------- + +/// Wraps a reqwest response for added features. +pub struct HttpResponse { + /// The wrapped reqwest response. + response: Response, + + /// A file to also store read data into. + file: Option, +} + +impl HttpResponse { + /// Creates a new response wrapping a reqwest reponse. + /// + /// If `response_dir` is some path, the response will also be written to + /// a file under this directory based on `uri`. Each URI component + /// starting with the authority will be a directory name. If `multi` is + /// `false` the last component will be the file name. If `multi` is + /// `true` the last component will be a directory, too, and the file name + /// will be the ISO timestamp of the current time. + pub fn create( + response: Response, + uri: &uri::Https, + response_dir: &Option, + multi: bool + ) -> Self { + HttpResponse { + response, + file: response_dir.as_ref().and_then(|base| { + Self::open_file(base, uri, multi) + }) + } + } + + /// Opens the file mirroring file. + /// + /// See [`create`][Self::create] for the rules. + fn open_file( + base: &Path, uri: &uri::Https, multi: bool + ) -> Option { + let path = base.join(&uri.as_str()[8..]); + let path = if multi { + path.join(Utc::now().to_rfc3339()) + } + else { + path + }; + + let parent = match path.parent() { + Some(parent) => parent, + None => { + warn!( + "Cannot keep HTTP response; \ + URI translated into a bad path '{}'", + path.display() + ); + return None + } + }; + if let Err(err) = fs::create_dir_all(parent) { + warn!( + "Cannot keep HTTP response; \ + creating directory {} failed: {}", + parent.display(), err + ); + return None + } + match fs::File::create(&path) { + Ok(file) => Some(file), + Err(err) => { + warn!( + "Cannot keep HTTP response; \ + creating file {} failed: {}", + path.display(), err + ); + None + } + } + } + + /// Returns the value of the content length header if present. + pub fn content_length(&self) -> Option { + self.response.content_length() + } + + /// Copies the full content of the response to the given writer. + pub fn copy_to( + &mut self, w: &mut W + ) -> Result { + // We cannot use the reqwest response’s `copy_to` impl because we need + // to use our own `io::Read` impl which sneaks in the copying to file + // if necessary. + io::copy(self, w) + } + + /// Returns the status code of the response. + pub fn status(&self) -> StatusCode { + self.response.status() + } + + /// Returns the value of the ETag header if present. + /// + /// The returned value is the complete content. That is, it includes the + /// quotation marks and a possible `W/` prefix. + /// + /// The method quietly returns `None` if the content of a header is + /// malformed or if there is more than one occurence of the header. + /// + /// The method returns a `Bytes` value as there is a good chance the + /// tag is short enough to be be inlined. + pub fn etag(&self) -> Option { + let mut etags = self.response.headers() + .get_all(header::ETAG) + .into_iter(); + let etag = etags.next()?; + if etags.next().is_some() { + return None + } + Self::parse_etag(etag.as_bytes()) + } + + /// Parses the ETag value. + /// + /// This is a separate function to make testing easier. + fn parse_etag(etag: &[u8]) -> Option { + // The tag starts with an optional case-sensitive `W/` followed by + // `"`. Let’s remember where the actual tag starts. + let start = if etag.starts_with(b"W/\"") { + 3 + } + else if etag.first() == Some(&b'"') { + 1 + } + else { + return None + }; + + // We need at least one more character. Empty tags are allowed. + if etag.len() <= start { + return None + } + + // The tag ends with a `"`. + if etag.last() != Some(&b'"') { + return None + } + + Some(Bytes::copy_from_slice(etag)) + } + + /// Returns the value of the Last-Modified header if present. + /// + /// The method quietly returns `None` if the content of a header is + /// malformed or if there is more than one occurence of the header. + pub fn last_modified(&self) -> Option> { + let mut iter = self.response.headers() + .get_all(header::LAST_MODIFIED) + .into_iter(); + let value = iter.next()?; + if iter.next().is_some() { + return None + } + parse_http_date(value.to_str().ok()?) + } +} + + +//--- Read + +impl io::Read for HttpResponse { + fn read(&mut self, buf: &mut [u8]) -> Result { + let res = self.response.read(buf)?; + if let Some(file) = self.file.as_mut() { + file.write_all(&buf[..res])?; + } + Ok(res) + } +} + + +//------------ HttpStatus ---------------------------------------------------- + +/// The result of an HTTP request. +#[derive(Clone, Copy, Debug)] +pub enum HttpStatus { + /// A response was received with the given status code. + Response(StatusCode), + + /// The repository URI was rejected. + Rejected, + + /// An error happened. + Error +} + +impl HttpStatus { + pub fn into_i16(self) -> i16 { + match self { + HttpStatus::Response(code) => code.as_u16() as i16, + HttpStatus::Rejected => -2, + HttpStatus::Error => -1, + } + } + + pub fn is_not_modified(self) -> bool { + matches!( + self, + HttpStatus::Response(code) if code == StatusCode::NOT_MODIFIED + ) + } + + pub fn is_success(self) -> bool { + matches!( + self, + HttpStatus::Response(code) if code.is_success() + ) + } +} + +impl From for HttpStatus { + fn from(code: StatusCode) -> Self { + HttpStatus::Response(code) + } +} + diff --git a/src/collector/rrdp/mod.rs b/src/collector/rrdp/mod.rs new file mode 100644 index 00000000..56c6b0a4 --- /dev/null +++ b/src/collector/rrdp/mod.rs @@ -0,0 +1,11 @@ +#![allow(dead_code)] + +pub use self::base::{Collector, LoadResult, ReadRepository, Run}; +pub use self::http::HttpStatus; +pub use self::update::SnapshotReason; + +mod archive; +mod base; +mod http; +mod update; + diff --git a/src/collector/rrdp/update.rs b/src/collector/rrdp/update.rs new file mode 100644 index 00000000..8b641ae1 --- /dev/null +++ b/src/collector/rrdp/update.rs @@ -0,0 +1,871 @@ + +use std::{error, fmt, io}; +use std::collections::HashSet; +use std::io::Read; +use bytes::Bytes; +use chrono::{DateTime, Utc}; +use log::{error, warn}; +use reqwest::StatusCode; +use ring::digest; +use ring::constant_time::verify_slices_are_equal; +use rpki::{rrdp, uri}; +use rpki::rrdp::{DeltaInfo, NotificationFile, ProcessDelta, ProcessSnapshot}; +use uuid::Uuid; +use crate::error::{Failed, RunFailed}; +use crate::metrics::RrdpRepositoryMetrics; +use crate::utils::archive::{ArchiveError, PublishError}; +use super::archive::{AccessError, FallbackTime, RepositoryState, RrdpArchive}; +use super::base::Collector; +use super::http::{HttpClient, HttpResponse, HttpStatus}; + + +//------------ Notification -------------------------------------------------- + +/// The notification file of an RRDP repository. +pub struct Notification { + /// The URI of the notification file. + uri: uri::Https, + + /// The content of the file. + content: NotificationFile, + + /// The Etag value if provided. + etag: Option, + + /// The Last-Modified value if provided, + last_modified: Option>, +} + +impl Notification { + /// Requests, parses, and returns the given RRDP notification file. + /// + /// The value referred to by `status` will be updated to the received + /// status code or `HttpStatus::Error` if the request failed. + /// + /// Returns the notification file on success. Returns `Ok(None)` if a + /// response was received successfully but indicated that the + /// notification file was not updated. + pub fn get( + http: &HttpClient, + uri: &uri::Https, + state: Option<&RepositoryState>, + status: &mut HttpStatus, + ) -> Result, Failed> { + let response = match http.conditional_response( + uri, + state.and_then(|state| state.etag.as_ref()), + state.and_then(|state| state.last_modified()), + true + ) { + Ok(response) => { + *status = response.status().into(); + response + } + Err(err) => { + warn!("RRDP {}: {}", uri, err); + *status = HttpStatus::Error; + return Err(Failed) + } + }; + + if response.status() == StatusCode::NOT_MODIFIED { + Ok(None) + } + else if response.status() != StatusCode::OK { + warn!( + "RRDP {}: Getting notification file failed with status {}", + uri, response.status() + ); + Err(Failed) + } + else { + Notification::from_response(uri.clone(), response).map(Some) + } + } + + + /// Creates a new notification from a successful HTTP response. + /// + /// Assumes that the response status was 200 OK. + fn from_response( + uri: uri::Https, response: HttpResponse + ) -> Result { + let etag = response.etag(); + let last_modified = response.last_modified(); + let mut content = NotificationFile::parse( + io::BufReader::new(response) + ).map_err(|err| { + warn!("RRDP {}: {}", uri, err); + Failed + })?; + content.sort_deltas(); + Ok(Notification { uri, content, etag, last_modified }) + } + + /// Returns a reference to the content of the notification file. + pub fn content(&self) -> &NotificationFile { + &self.content + } + + /// Creates repository state for this notification. + pub fn to_repository_state( + &self, fallback: FallbackTime, + ) -> RepositoryState { + RepositoryState { + rpki_notify: self.uri.clone(), + session: self.content.session_id(), + serial: self.content.serial(), + updated_ts: Utc::now().timestamp(), + best_before_ts: fallback.best_before().timestamp(), + last_modified_ts: self.last_modified.map(|x| x.timestamp()), + etag: self.etag.clone(), + } + } +} + + +//------------ SnapshotUpdate ------------------------------------------------ + +/// An update to a repository performed from a snapshot file. +/// +/// For this type of update, we collect all the published objects in the +/// repository’s temp directory and move it over to the object directory upon +/// success. +pub struct SnapshotUpdate<'a> { + /// The collector. + collector: &'a Collector, + + /// The archive to store the snapshot into. + archive: &'a mut RrdpArchive, + + /// The notification file pointing to the snapshot. + notify: &'a Notification, + + /// The metrics for the update. + metrics: &'a mut RrdpRepositoryMetrics, +} + +impl<'a> SnapshotUpdate<'a> { + pub fn new( + collector: &'a Collector, + archive: &'a mut RrdpArchive, + notify: &'a Notification, + metrics: &'a mut RrdpRepositoryMetrics, + ) -> Self { + SnapshotUpdate { collector, archive, notify, metrics } + } + + pub fn try_update(mut self) -> Result<(), SnapshotError> { + let response = match self.collector.http().response( + self.notify.content.snapshot().uri(), false + ) { + Ok(response) => { + self.metrics.payload_status = Some(response.status().into()); + if response.status() != StatusCode::OK { + return Err(response.status().into()) + } + else { + response + } + } + Err(err) => { + self.metrics.payload_status = Some(HttpStatus::Error); + return Err(err.into()) + } + }; + + let mut reader = io::BufReader::new(HashRead::new(response)); + self.process(&mut reader)?; + let hash = reader.into_inner().into_hash(); + if verify_slices_are_equal( + hash.as_ref(), + self.notify.content.snapshot().hash().as_ref() + ).is_err() { + return Err(SnapshotError::HashMismatch) + } + self.archive.publish_state( + &self.notify.to_repository_state( + self.collector.config().fallback_time + ) + )?; + Ok(()) + } +} + +impl<'a> ProcessSnapshot for SnapshotUpdate<'a> { + type Err = SnapshotError; + + fn meta( + &mut self, + session_id: Uuid, + serial: u64, + ) -> Result<(), Self::Err> { + if session_id != self.notify.content.session_id() { + return Err(SnapshotError::SessionMismatch { + expected: self.notify.content.session_id(), + received: session_id + }) + } + if serial != self.notify.content.serial() { + return Err(SnapshotError::SerialMismatch { + expected: self.notify.content.serial(), + received: serial + }) + } + Ok(()) + } + + fn publish( + &mut self, + uri: uri::Rsync, + data: &mut rrdp::ObjectReader, + ) -> Result<(), Self::Err> { + let content = RrdpDataRead::new( + data, &uri, self.collector.config().max_object_size, + ).read_all()?; + self.archive.publish_object(&uri, &content).map_err(|err| match err { + PublishError::AlreadyExists => { + SnapshotError::DuplicateObject(uri.clone()) + } + PublishError::Archive(ArchiveError::Corrupt) => { + warn!( + "Temporary RRDP repository file {} became corrupt.", + self.archive.path().display(), + ); + SnapshotError::RunFailed(RunFailed::retry()) + } + PublishError::Archive(ArchiveError::Io(err)) => { + error!( + "Fatal: Failed to write to temporary RRDP repository file \ + {}: {}", + self.archive.path().display(), err, + ); + SnapshotError::RunFailed(RunFailed::fatal()) + } + }) + } +} + + +//------------ DeltaUpdate --------------------------------------------------- + +/// An update to a repository performed from a delta file. +/// +/// For this kind of update, we collect newly published and updated objects in +/// the repository’s temp directory and remember them as well as all deleted +/// objects and if everything is okay, copy files over to and delete files in +/// the object directory. +pub struct DeltaUpdate<'a> { + /// The collector. + collector: &'a Collector, + + /// The archive the repository is stored in. + archive: &'a mut RrdpArchive, + + /// The session ID of the RRDP session. + session_id: Uuid, + + /// Information about the delta file. + info: &'a DeltaInfo, + + /// The metrics for the update. + metrics: &'a mut RrdpRepositoryMetrics, + + /// The URIs we’ve already seen in this delta. + /// + /// This is so we can error out if a URI was touched more than once. + seen: HashSet, +} + +impl<'a> DeltaUpdate<'a> { + /// Creates a new delta update. + pub fn new( + collector: &'a Collector, + archive: &'a mut RrdpArchive, + session_id: Uuid, + info: &'a DeltaInfo, + metrics: &'a mut RrdpRepositoryMetrics, + ) -> Self { + DeltaUpdate { + collector, archive, session_id, info, metrics, + seen: Default::default(), + } + } + + pub fn try_update(mut self) -> Result<(), DeltaError> { + let response = match self.collector.http().response( + self.info.uri(), false + ) { + Ok(response) => { + self.metrics.payload_status = Some(response.status().into()); + if response.status() != StatusCode::OK { + return Err(response.status().into()) + } + else { + response + } + } + Err(err) => { + self.metrics.payload_status = Some(HttpStatus::Error); + return Err(err.into()) + } + }; + + let mut reader = io::BufReader::new(HashRead::new(response)); + self.process(&mut reader)?; + let hash = reader.into_inner().into_hash(); + if verify_slices_are_equal( + hash.as_ref(), + self.info.hash().as_ref() + ).is_err() { + return Err(DeltaError::DeltaHashMismatch) + } + Ok(()) + } +} + +impl<'a> ProcessDelta for DeltaUpdate<'a> { + type Err = DeltaError; + + fn meta( + &mut self, session_id: Uuid, serial: u64 + ) -> Result<(), Self::Err> { + if session_id != self.session_id { + return Err(DeltaError::SessionMismatch { + expected: self.session_id, + received: session_id + }) + } + if serial != self.info.serial() { + return Err(DeltaError::SerialMismatch { + expected: self.info.serial(), + received: serial + }) + } + Ok(()) + } + + fn publish( + &mut self, + uri: uri::Rsync, + hash: Option, + data: &mut rrdp::ObjectReader<'_> + ) -> Result<(), Self::Err> { + if !self.seen.insert(uri.clone()) { + return Err(DeltaError::ObjectRepeated { uri }) + } + let content = RrdpDataRead::new( + data, &uri, self.collector.config().max_object_size + ).read_all()?; + match hash { + Some(hash) => { + self.archive.update_object( + &uri, hash, &content + ).map_err(|err| match err { + AccessError::NotFound => { + DeltaError::MissingObject { uri: uri.clone() } + } + AccessError::HashMismatch => { + DeltaError::ObjectHashMismatch { uri: uri.clone() } + } + AccessError::Archive(err) => DeltaError::Archive(err), + }) + } + None => { + self.archive.publish_object(&uri, &content).map_err(|err| { + match err { + PublishError::AlreadyExists => { + DeltaError::ObjectAlreadyPresent { + uri: uri.clone() + } + } + PublishError::Archive(err) => { + DeltaError::Archive(err) + } + } + }) + } + } + } + + fn withdraw( + &mut self, + uri: uri::Rsync, + hash: rrdp::Hash + ) -> Result<(), Self::Err> { + if !self.seen.insert(uri.clone()) { + return Err(DeltaError::ObjectRepeated { uri }) + } + self.archive.delete_object(&uri, hash).map_err(|err| match err { + AccessError::NotFound => { + DeltaError::MissingObject { uri: uri.clone() } + } + AccessError::HashMismatch => { + DeltaError::ObjectHashMismatch { uri: uri.clone() } + } + AccessError::Archive(err) => DeltaError::Archive(err), + }) + } +} + + +//------------ HashRead ------------------------------------------------------ + +/// A reader wrapper that calculates the SHA-256 hash of all read data. +struct HashRead { + /// The wrapped reader. + reader: R, + + /// The context for hash calculation. + context: digest::Context, +} + +impl HashRead { + /// Creates a new hash reader. + pub fn new(reader: R) -> Self { + HashRead { + reader, + context: digest::Context::new(&digest::SHA256) + } + } + + /// Converts the reader into the hash. + pub fn into_hash(self) -> rrdp::Hash { + // Unwrap should be safe: This can only fail if the slice has the + // wrong length. + rrdp::Hash::try_from(self.context.finish()).unwrap() + } +} + + +impl io::Read for HashRead { + fn read(&mut self, buf: &mut [u8]) -> Result { + let res = self.reader.read(buf)?; + self.context.update(&buf[..res]); + Ok(res) + } +} + + +//------------ RrdpDataRead -------------------------------------------------- + +/// A reader that reads the data of objects in a snapshot or delta. +/// +/// The type ensures the size limit of objects and allows treating read errors +/// differently than write errors by storing any error and making it available +/// after the fact. +struct RrdpDataRead<'a, R> { + /// The wrapped reader. + reader: R, + + /// The URI of the object we are reading. + uri: &'a uri::Rsync, + + /// The number of bytes left to read. + /// + /// If this is `None` we are allowed to read an unlimited amount. + left: Option, + + /// The last error that happend. + err: Option, +} + +impl<'a, R> RrdpDataRead<'a, R> { + /// Creates a new read from necessary information. + /// + /// The returned value will wrap `reader`. The `uri` should be the rsync + /// URI of the published object. It is only used for generating meaningful + /// error messages. If `max_size` is some value, the size of the object + /// will be limited to that value in bytes. Larger objects lead to an + /// error. + pub fn new(reader: R, uri: &'a uri::Rsync, max_size: Option) -> Self { + RrdpDataRead { reader, uri, left: max_size, err: None } + } + + /// Returns a stored error if available. + /// + /// If it returns some error, that error happened during reading before + /// an `io::Error` was returned. + /// + /// The method takes the stored error and replaces it internally with + /// `None`. + pub fn take_err(&mut self) -> Option { + self.err.take() + } +} + +impl<'a, R: io::Read> RrdpDataRead<'a, R> { + /// Reads the data into a vec. + pub fn read_all(mut self) -> Result, RrdpDataReadError> { + let mut content = Vec::new(); + if let Err(io_err) = self.read_to_end(&mut content) { + return Err( + match self.take_err() { + Some(data_err) => data_err, + None => RrdpDataReadError::Read(io_err), + } + ) + } + Ok(content) + } +} + +impl<'a, R: io::Read> io::Read for RrdpDataRead<'a, R> { + fn read(&mut self, buf: &mut [u8]) -> Result { + let res = match self.reader.read(buf) { + Ok(res) => res, + Err(err) => { + self.err = Some(RrdpDataReadError::Read(err)); + return Err(io::Error::new( + io::ErrorKind::Other, "reading data failed", + )) + } + }; + if let Some(left) = self.left { + let res64 = match u64::try_from(res) { + Ok(res) => res, + Err(_) => { + // If the usize doesn’t fit into a u64, things are + // definitely way too big. + self.left = Some(0); + self.err = Some( + RrdpDataReadError::LargeObject(self.uri.clone()) + ); + return Err(io::Error::new( + io::ErrorKind::Other, "size limit exceeded" + )) + } + }; + if res64 > left { + self.left = Some(0); + self.err = Some( + RrdpDataReadError::LargeObject(self.uri.clone()) + ); + Err(io::Error::new( + io::ErrorKind::Other, "size limit exceeded") + ) + } + else { + self.left = Some(left - res64); + Ok(res) + } + } + else { + Ok(res) + } + } +} + + +//------------ SnapshotReason ------------------------------------------------ + +/// The reason why a snapshot was used. +#[derive(Clone, Copy, Debug)] +pub enum SnapshotReason { + /// The respository is new. + NewRepository, + + /// A new session was encountered. + NewSession, + + /// The delta set in the notification file is inconsistent. + BadDeltaSet, + + /// A larger-than-supported serial number was encountered. + LargeSerial, + + /// The local copy is outdated and cannot be updated via deltas. + OutdatedLocal, + + /// A delta file was conflicting with locally stored data. + ConflictingDelta, + + /// There were too many deltas to process. + TooManyDeltas, + + /// The local copy was corrupt. + CorruptArchive, +} + +impl SnapshotReason { + /// Returns a shorthand code for the reason. + pub fn code(self) -> &'static str { + use SnapshotReason::*; + + match self { + NewRepository => "new-repository", + NewSession => "new-session", + BadDeltaSet => "inconsistent-delta-set", + LargeSerial => "large-serial", + OutdatedLocal => "outdate-local", + ConflictingDelta => "conflicting-delta", + TooManyDeltas => "too-many-deltas", + CorruptArchive => "corrupt-local-copy", + } + } +} + + +//============ Errors ======================================================== + +//------------ RrdpDataReadError --------------------------------------------- + +/// An error happened while reading object data. +/// +/// This covers both the case where the maximum allowed file size was +/// exhausted as well as where reading data failed. Neither of them is fatal, +/// so we need to process them separately. +#[derive(Debug)] +enum RrdpDataReadError { + LargeObject(uri::Rsync), + Read(io::Error), +} + + +//------------ SnapshotError ------------------------------------------------- + +/// An error happened during snapshot processing. +/// +/// This is an internal error type only necessary for error handling during +/// RRDP processing. Values will be logged and converted into failures or +/// negative results as necessary. +#[derive(Debug)] +pub enum SnapshotError { + Http(reqwest::Error), + HttpStatus(StatusCode), + Rrdp(rrdp::ProcessError), + SessionMismatch { + expected: Uuid, + received: Uuid + }, + SerialMismatch { + expected: u64, + received: u64, + }, + DuplicateObject(uri::Rsync), + HashMismatch, + LargeObject(uri::Rsync), + RunFailed(RunFailed), +} + +impl From for SnapshotError { + fn from(err: reqwest::Error) -> Self { + SnapshotError::Http(err) + } +} + +impl From for SnapshotError { + fn from(code: StatusCode) -> Self { + SnapshotError::HttpStatus(code) + } +} + +impl From for SnapshotError { + fn from(err: rrdp::ProcessError) -> Self { + SnapshotError::Rrdp(err) + } +} + +impl From for SnapshotError { + fn from(err: io::Error) -> Self { + SnapshotError::Rrdp(err.into()) + } +} + +impl From for SnapshotError { + fn from(err: RunFailed) -> Self { + SnapshotError::RunFailed(err) + } +} + +impl From for SnapshotError { + fn from(err: RrdpDataReadError) -> Self { + match err { + RrdpDataReadError::LargeObject(uri) => { + SnapshotError::LargeObject(uri) + } + RrdpDataReadError::Read(err) => { + SnapshotError::Rrdp(err.into()) + } + } + } +} + +impl fmt::Display for SnapshotError { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match *self { + SnapshotError::Http(ref err) => err.fmt(f), + SnapshotError::HttpStatus(status) => { + write!(f, "HTTP {}", status) + } + SnapshotError::Rrdp(ref err) => err.fmt(f), + SnapshotError::SessionMismatch { ref expected, ref received } => { + write!( + f, + "session ID mismatch (notification_file: {}, \ + snapshot file: {}", + expected, received + ) + } + SnapshotError::SerialMismatch { ref expected, ref received } => { + write!( + f, + "serial number mismatch (notification_file: {}, \ + snapshot file: {}", + expected, received + ) + } + SnapshotError::DuplicateObject(ref uri) => { + write!(f, "duplicate object: {}", uri) + } + SnapshotError::HashMismatch => { + write!(f, "hash value mismatch") + } + SnapshotError::LargeObject(ref uri) => { + write!(f, "object exceeds size limit: {}", uri) + } + SnapshotError::RunFailed(_) => Ok(()), + } + } +} + +impl error::Error for SnapshotError { } + + +//------------ DeltaError ---------------------------------------------------- + +/// An error happened during delta processing. +/// +/// This is an internal error type only necessary for error handling during +/// RRDP processing. Values will be logged and converted into failures or +/// negative results as necessary. +#[derive(Debug)] +pub enum DeltaError { + Http(reqwest::Error), + HttpStatus(StatusCode), + Rrdp(rrdp::ProcessError), + SessionMismatch { + expected: Uuid, + received: Uuid + }, + SerialMismatch { + expected: u64, + received: u64, + }, + MissingObject { + uri: uri::Rsync, + }, + ObjectAlreadyPresent { + uri: uri::Rsync, + }, + ObjectHashMismatch { + uri: uri::Rsync, + }, + ObjectRepeated { + uri: uri::Rsync, + }, + DeltaHashMismatch, + LargeObject(uri::Rsync), + Archive(ArchiveError), +} + +impl From for DeltaError { + fn from(err: reqwest::Error) -> Self { + DeltaError::Http(err) + } +} + +impl From for DeltaError { + fn from(code: StatusCode) -> Self { + DeltaError::HttpStatus(code) + } +} + +impl From for DeltaError { + fn from(err: rrdp::ProcessError) -> Self { + DeltaError::Rrdp(err) + } +} + +impl From for DeltaError { + fn from(err: io::Error) -> Self { + DeltaError::Rrdp(err.into()) + } +} + +impl From for DeltaError { + fn from(err: RrdpDataReadError) -> Self { + match err { + RrdpDataReadError::LargeObject(uri) => { + DeltaError::LargeObject(uri) + } + RrdpDataReadError::Read(err) => { + DeltaError::Rrdp(err.into()) + } + } + } +} + +impl fmt::Display for DeltaError { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match *self { + DeltaError::Http(ref err) => err.fmt(f), + DeltaError::HttpStatus(status) => { + write!(f, "HTTP {}", status) + } + DeltaError::Rrdp(ref err) => err.fmt(f), + DeltaError::SessionMismatch { ref expected, ref received } => { + write!( + f, + "session ID mismatch (notification_file: {}, \ + snapshot file: {}", + expected, received + ) + } + DeltaError::SerialMismatch { ref expected, ref received } => { + write!( + f, + "serial number mismatch (notification_file: {}, \ + snapshot file: {}", + expected, received + ) + } + DeltaError::MissingObject { ref uri } => { + write!( + f, + "reference to missing object {}", + uri + ) + } + DeltaError::ObjectAlreadyPresent { ref uri } => { + write!( + f, + "attempt to add already present object {}", + uri + ) + } + DeltaError::ObjectHashMismatch { ref uri } => { + write!( + f, + "local object {} has different hash", + uri + ) + } + DeltaError::ObjectRepeated { ref uri } => { + write!(f, "object appears multiple times: {}", uri) + } + DeltaError::LargeObject(ref uri) => { + write!(f, "object exceeds size limit: {}", uri) + } + DeltaError::DeltaHashMismatch => { + write!(f, "delta file hash value mismatch") + } + DeltaError::Archive(ref err) => { + write!(f, "archive error: {}", err) + } + } + } +} + +impl error::Error for DeltaError { } + diff --git a/src/collector/rsync.rs b/src/collector/rsync.rs index 2fe39bc7..3a7a3420 100644 --- a/src/collector/rsync.rs +++ b/src/collector/rsync.rs @@ -28,7 +28,7 @@ use log::{debug, error, info, warn}; use rpki::uri; use tokio::process::Command as AsyncCommand; use crate::config::Config; -use crate::error::Failed; +use crate::error::{Failed, Fatal}; use crate::metrics::{Metrics, RsyncModuleMetrics}; use crate::utils::fatal; use crate::utils::sync::{Mutex, RwLock}; @@ -114,6 +114,13 @@ impl Collector { Ok(()) } + /// Sanitizes the stored data. + /// + /// Currently doesn’t do anything. + pub fn sanitize(&self) -> Result<(), Fatal> { + Ok(()) + } + /// Start a validation run on the collector. pub fn start(&self) -> Run { Run::new(self) diff --git a/src/engine.rs b/src/engine.rs index 23f1a7c1..9d4aba91 100644 --- a/src/engine.rs +++ b/src/engine.rs @@ -43,7 +43,7 @@ use rpki::uri; use crate::{collector, store, tals}; use crate::config::{Config, FilterPolicy}; use crate::collector::Collector; -use crate::error::Failed; +use crate::error::{Failed, Fatal, RunFailed}; use crate::metrics::{ Metrics, PublicationMetrics, RepositoryMetrics, TalMetrics }; @@ -276,6 +276,18 @@ impl Engine { Ok(()) } + /// Sanitizes the stored data. + /// + /// This goes over the stored data and deletes what looks broken. It + /// should be called before retrying a failed restartable run. + pub fn sanitize(&self) -> Result<(), Fatal> { + self.store.sanitize()?; + if let Some(collector) = self.collector.as_ref() { + collector.sanitize()?; + } + Ok(()) + } + /// Starts a validation run. /// /// During the run, `processor` will be responsible for dealing with @@ -329,6 +341,12 @@ pub struct Run<'a, P> { /// The processor for valid data. processor: P, + /// Was an error encountered during the run? + had_err: AtomicBool, + + /// Was a fatal error encountered during the run? + is_fatal: AtomicBool, + /// The metrics collected during the run. metrics: Metrics, } @@ -343,6 +361,8 @@ impl<'a, P> Run<'a, P> { ) -> Self { Run { validation, collector, store, processor, + had_err: AtomicBool::new(false), + is_fatal: AtomicBool::new(false), metrics: Default::default() } } @@ -378,7 +398,7 @@ impl<'a, P> Run<'a, P> { impl<'a, P: ProcessRun> Run<'a, P> { /// Performs the validation run. - pub fn process(&mut self) -> Result<(), Failed> { + pub fn process(&mut self) -> Result<(), RunFailed> { // If we don’t have any TALs, we ain’t got nothing to do. if self.validation.tals.is_empty() { return Ok(()) @@ -395,7 +415,6 @@ impl<'a, P: ProcessRun> Run<'a, P> { // And off we trot. // Keep a flag to cancel everything if something goes wrong. - let had_err = AtomicBool::new(false); let thread_metrics = ArrayQueue::new( self.validation.validation_threads ); @@ -405,7 +424,7 @@ impl<'a, P: ProcessRun> Run<'a, P> { let mut metrics = metrics.fork(); while let Some(task) = tasks.pop() { if self.process_task( - task, &tasks, &mut metrics, &had_err, + task, &tasks, &mut metrics, ).is_err() { break; } @@ -415,8 +434,13 @@ impl<'a, P: ProcessRun> Run<'a, P> { } }); - if had_err.load(Ordering::Relaxed) { - return Err(Failed); + if self.had_err.load(Ordering::Relaxed) { + if self.is_fatal.load(Ordering::Relaxed) { + return Err(RunFailed::fatal()) + } + else { + return Err(RunFailed::retry()) + } } metrics.prepare_final(&mut self.metrics); @@ -433,14 +457,13 @@ impl<'a, P: ProcessRun> Run<'a, P> { task: Task, tasks: &SegQueue>, metrics: &mut RunMetrics, - had_err: &AtomicBool, ) -> Result<(), Failed> { match task { Task::Tal(task) => { - self.process_tal_task(task, tasks, metrics, had_err) + self.process_tal_task(task, tasks, metrics) } Task::Ca(task) => { - self.process_ca_task(task, tasks, metrics, had_err) + self.process_ca_task(task, tasks, metrics) } } } @@ -450,7 +473,6 @@ impl<'a, P: ProcessRun> Run<'a, P> { &self, task: TalTask, tasks: &SegQueue>, metrics: &mut RunMetrics, - had_err: &AtomicBool, ) -> Result<(), Failed> { for uri in task.tal.uris() { let cert = match self.load_ta(uri, task.tal.info())? { @@ -489,7 +511,7 @@ impl<'a, P: ProcessRun> Run<'a, P> { repository_index: None, defer: false, }, - tasks, metrics, had_err + tasks, metrics, ) } None => { @@ -533,29 +555,36 @@ impl<'a, P: ProcessRun> Run<'a, P> { task: CaTask, tasks: &SegQueue>, metrics: &mut RunMetrics, - had_err: &AtomicBool, ) -> Result<(), Failed> { let more_tasks = PubPoint::new( self, &task.cert, task.processor, task.repository_index, ).and_then(|point| { point.process(metrics) - }).map_err(|_| { - had_err.store(true, Ordering::Relaxed); + }).map_err(|err| { + self.run_failed(err); Failed })?; for task in more_tasks { - if had_err.load(Ordering::Relaxed) { + if self.had_err.load(Ordering::Relaxed) { return Err(Failed) } if task.defer { tasks.push(Task::Ca(task)) } else { - self.process_ca_task(task, tasks, metrics, had_err)?; + self.process_ca_task(task, tasks, metrics)?; } } Ok(()) } + + /// Marks the run as failed. + fn run_failed(&self, err: RunFailed) { + self.had_err.store(true, Ordering::Relaxed); + if err.is_fatal() { + self.is_fatal.store(true, Ordering::Relaxed); + } + } } @@ -591,7 +620,7 @@ impl<'a, P: ProcessRun> PubPoint<'a, P> { cert: &'a Arc, processor: P::PubPoint, repository_index: Option, - ) -> Result { + ) -> Result { Ok(PubPoint { run, cert, processor, repository_index, metrics: Default::default(), @@ -605,7 +634,7 @@ impl<'a, P: ProcessRun> PubPoint<'a, P> { pub fn process( self, metrics: &mut RunMetrics, - ) -> Result>, Failed> { + ) -> Result>, RunFailed> { let mut store = self.run.store.pub_point(self.cert)?; if let Some(collector) = self.run.collector.as_ref() { if let Some(collector) = collector.repository(self.cert)? { @@ -615,12 +644,12 @@ impl<'a, P: ProcessRun> PubPoint<'a, P> { Ok(res) => return Ok(res), Err(mut this) => { this.metrics = Default::default(); - return this.process_stored(store, metrics) + return Ok(this.process_stored(store, metrics)?) } } } } - self.process_stored(store, metrics) + Ok(self.process_stored(store, metrics)?) } /// Tries to update the stored data and validate at the same time. @@ -640,7 +669,7 @@ impl<'a, P: ProcessRun> PubPoint<'a, P> { collector: collector::Repository, store: &mut StoredPoint, metrics: &mut RunMetrics, - ) -> Result>, Self>, Failed> { + ) -> Result>, Self>, RunFailed> { // Try to load the manifest from the collector. If there isn’t one, // we are done, too. let collected = match collector.load_object( @@ -767,9 +796,9 @@ impl<'a, P: ProcessRun> PubPoint<'a, P> { // Update was aborted. We need to use the store. Ok(Err(self)) } - Err(store::UpdateError::Fatal) => { + Err(store::UpdateError::Failed(err)) => { // We are doomed. - Err(Failed) + Err(err) } } } @@ -784,7 +813,7 @@ impl<'a, P: ProcessRun> PubPoint<'a, P> { &mut self, manifest_bytes: Bytes, repository: &collector::Repository, - ) -> Result, Failed> { + ) -> Result, RunFailed> { let manifest = match Manifest::decode( manifest_bytes.clone(), self.run.validation.strict ) { @@ -854,7 +883,7 @@ impl<'a, P: ProcessRun> PubPoint<'a, P> { ee_cert: &ResourceCert, manifest: &ManifestContent, repository: &collector::Repository - ) -> Result, Failed> { + ) -> Result, RunFailed> { // Let’s first get the manifest CRL’s name relative to repo_uri. If // it ain’t relative at all, this is already invalid. let crl_uri = match ee_cert.crl_uri() { diff --git a/src/error.rs b/src/error.rs index ed2dd8f9..184b7d95 100644 --- a/src/error.rs +++ b/src/error.rs @@ -24,6 +24,81 @@ use log::error; #[derive(Clone, Copy, Debug)] pub struct Failed; +impl From for Failed { + fn from(_: Fatal) -> Failed { + Failed + } +} + + +//------------ RunFailed ----------------------------------------------------- + +/// A validation run has failed to complete. +/// +/// This error may be recoverable, which typically happens after some local +/// data corruption has been discovered and the offending was data removed. A +/// new validation run should then be started immediately to hopefully lead +/// to a success. +/// +/// The error may also be fatal in which Routinator should just exit. +#[derive(Clone, Copy, Debug)] +pub struct RunFailed { + /// Was the error fatal? + fatal: bool, +} + +impl RunFailed { + /// Create a new fatal run failure. + pub fn fatal() -> Self { + RunFailed { fatal: true } + } + + /// Create a new “retry” run failure. + pub fn retry() -> Self { + RunFailed { fatal: false } + } + + /// Returns whether the error is fatal. + pub fn is_fatal(self) -> bool { + self.fatal + } + + /// Returns whether the run should be retried. + pub fn should_retry(self) -> bool { + !self.fatal + } +} + +impl From for RunFailed { + fn from(_: Fatal) -> Self { + RunFailed::fatal() + } +} + +impl From for RunFailed { + fn from(_: Failed) -> Self { + RunFailed::fatal() + } +} + + +//------------ Fatal --------------------------------------------------------- + +/// An operation has failed and continuing is pointless. +/// +/// This error types is used to indicate that an operation has failed, +/// diagnostic information has been printed or logged, and continuing is +/// pointless or even dangerous. +#[derive(Clone, Copy, Debug)] +pub struct Fatal; + +// XXX This shouldn’t be here. +impl From for Fatal { + fn from(_: Failed) -> Self { + Self + } +} + //------------ ExitError ----------------------------------------------------- @@ -53,3 +128,17 @@ impl From for ExitError { } } +impl From for ExitError { + fn from(_: RunFailed) -> ExitError { + error!("Fatal error. Exiting."); + ExitError::Generic + } +} + +impl From for ExitError { + fn from(_: Fatal) -> ExitError { + error!("Fatal error. Exiting."); + ExitError::Generic + } +} + diff --git a/src/operation.rs b/src/operation.rs index fdf2a5f3..3ac29e1b 100644 --- a/src/operation.rs +++ b/src/operation.rs @@ -31,7 +31,7 @@ use tokio::sync::oneshot; #[cfg(feature = "rta")] use crate::rta; use crate::{output, validity}; use crate::config::Config; -use crate::error::{ExitError, Failed}; +use crate::error::{ExitError, Failed, RunFailed}; use crate::http::http_listener; use crate::metrics::{SharedRtrServerMetrics}; use crate::output::{Output, OutputFormat}; @@ -249,6 +249,7 @@ impl Server { validation.ignite()?; let join = thread::spawn(move || { + let mut can_retry = true; let err = loop { if let Some(log) = log.as_ref() { log.start(); @@ -257,13 +258,39 @@ impl Server { process.config(), true ) { Ok(exceptions) => { - if Self::process_once( + match Self::process_once( process.config(), &validation, &history, &mut notify, exceptions, - ).is_err() { - break Err(Failed); + ) { + Ok(()) => { + history.read().refresh_wait() + } + Err(err) => { + if err.should_retry() { + if can_retry { + if validation.sanitize().is_err() { + break Err(Failed) + } + info!( + "Validation failed but \ + can be retried." + ); + can_retry = false; + Duration::from_secs(0) + } + else { + error!( + "Retried validation failed again." + ); + break Err(Failed); + } + } + else { + break Err(Failed); + } + } } - history.read().refresh_wait() + } Err(_) => { error!( @@ -359,7 +386,7 @@ impl Server { history: &SharedHistory, notify: &mut NotifySender, exceptions: LocalExceptions, - ) -> Result<(), Failed> { + ) -> Result<(), RunFailed> { info!("Starting a validation run."); history.mark_update_start(); let (report, metrics) = ValidationReport::process(engine, config)?; @@ -550,9 +577,30 @@ impl Vrps { engine.ignite()?; process.switch_logging(false, false)?; let exceptions = LocalExceptions::load(process.config(), true)?; - let (report, mut metrics) = ValidationReport::process( - &engine, process.config(), - )?; + let (report, mut metrics) = { + // Retry once if we get a non-fatal error. + let mut once = false; + + loop { + match ValidationReport::process(&engine, process.config()) { + Ok(res) => break res, + Err(err) => { + if err.should_retry() { + if once { + error!( + "Restarted run failed again. Aborting." + ); + } + if engine.sanitize().is_ok() { + once = true; + continue + } + } + return Err(ExitError::Generic) + } + } + } + }; let vrps = Arc::new(report.into_snapshot(&exceptions, &mut metrics)); let rsync_complete = metrics.rsync_complete(); let metrics = Arc::new(metrics); diff --git a/src/payload/validation.rs b/src/payload/validation.rs index cb6f3aef..c6fa5674 100644 --- a/src/payload/validation.rs +++ b/src/payload/validation.rs @@ -34,7 +34,7 @@ use rpki::rtr::payload::{Aspa, RouteOrigin, RouterKey}; use rpki::rtr::pdu::{ProviderAsns, RouterKeyInfo}; use crate::config::{Config, FilterPolicy}; use crate::engine::{CaCert, Engine, ProcessPubPoint, ProcessRun}; -use crate::error::Failed; +use crate::error::{Failed, RunFailed}; use crate::metrics::{Metrics, PayloadMetrics, VrpMetrics}; use crate::slurm::LocalExceptions; use super::info::{PayloadInfo, PublishInfo}; @@ -95,7 +95,7 @@ impl ValidationReport { /// Creates a new validation report by running the engine. pub fn process( engine: &Engine, config: &Config, - ) -> Result<(Self, Metrics), Failed> { + ) -> Result<(Self, Metrics), RunFailed> { let report = Self::new(config); let mut run = engine.start(&report)?; run.process()?; diff --git a/src/rta.rs b/src/rta.rs index 0cbbdfd2..34fb24f6 100644 --- a/src/rta.rs +++ b/src/rta.rs @@ -8,7 +8,7 @@ use rpki::repository::rta::{ResourceTaggedAttestation, Rta}; use rpki::repository::tal::{Tal, TalUri}; use crate::config::Config; use crate::engine::{CaCert, ProcessPubPoint, ProcessRun, Engine}; -use crate::error::Failed; +use crate::error::{Failed, RunFailed}; //------------ ValidationReport ---------------------------------------------- @@ -35,7 +35,7 @@ impl<'a> ValidationReport<'a> { pub fn process( &self, engine: &Engine, - ) -> Result<(), Failed> { + ) -> Result<(), RunFailed> { let mut run = engine.start(self)?; run.process()?; run.cleanup()?; diff --git a/src/store.rs b/src/store.rs index 502f1b2a..23158873 100644 --- a/src/store.rs +++ b/src/store.rs @@ -79,7 +79,7 @@ use rpki::uri; use crate::collector; use crate::config::Config; use crate::engine::CaCert; -use crate::error::Failed; +use crate::error::{Failed, Fatal, RunFailed}; use crate::metrics::Metrics; use crate::utils::fatal; use crate::utils::binio::{Compose, Parse, ParseError}; @@ -146,6 +146,13 @@ impl Store { }) } + /// Sanitizes the stored data. + /// + /// Currently doesn’t do anything. + pub fn sanitize(&self) -> Result<(), Fatal> { + Ok(()) + } + /// Start a validation run with the store. pub fn start(&self) -> Run { Run::new(self) @@ -761,7 +768,7 @@ impl<'a> StoredPoint<'a> { "Fatal: failed to write to file {}: {}", tmp_path.display(), err ); - return Err(UpdateError::Fatal) + return Err(UpdateError::fatal()) } let tmp_object_start = match tmp_file.stream_position() { Ok(some) => some, @@ -770,7 +777,7 @@ impl<'a> StoredPoint<'a> { "Fatal: failed to get position in file {}: {}", tmp_path.display(), err ); - return Err(UpdateError::Fatal) + return Err(UpdateError::fatal()) } }; @@ -782,7 +789,7 @@ impl<'a> StoredPoint<'a> { "Fatal: failed to write to file {}: {}", tmp_path.display(), err ); - return Err(UpdateError::Fatal) + return Err(UpdateError::fatal()) } } Ok(None) => break, @@ -811,7 +818,7 @@ impl<'a> StoredPoint<'a> { "Fatal: failed to position file {}: {}", self.path.display(), err ); - return Err(UpdateError::Fatal) + return Err(UpdateError::fatal()) } self.file = Some(file); @@ -1117,19 +1124,33 @@ impl StoredObject { //============ Error Types =================================================== +//------------ UpdateError --------------------------------------------------- + /// An error happend while updating a publication point. #[derive(Clone, Copy, Debug)] pub enum UpdateError { /// The update needs to be aborted and rolled back. Abort, - /// Something really bad and fatal happened. - Fatal, + /// Something really bad happened that requires aborting the run. + Failed(RunFailed), +} + +impl UpdateError { + pub fn fatal() -> Self { + UpdateError::Failed(RunFailed::fatal()) + } } impl From for UpdateError { fn from(_: Failed) -> Self { - UpdateError::Fatal + UpdateError::Failed(RunFailed::fatal()) + } +} + +impl From for UpdateError { + fn from(err: RunFailed) -> Self { + UpdateError::Failed(err) } } diff --git a/src/utils/archive.rs b/src/utils/archive.rs new file mode 100644 index 00000000..03cdb49e --- /dev/null +++ b/src/utils/archive.rs @@ -0,0 +1,1825 @@ +//! A simple archive for RRDP repository data. +//! +//! This module contains a very simple file archive that is tailored towards +//! the needs of RRDP. It can be used to store the RPKI objects published via +//! RRDP in a single file per repository. +//! +//! Each archive is a sequence of objects (basically: files, but the term is +//! confusingly overloaded in this context) preceeded by its name and size +//! and some additional accounting information. An object can be empty and +//! its space available for use by new objects. When objects are deleted, they +//! are replaced by such empty objects. +//! +//! If a new object needs to be added, an attempt is made to reuse the largest +//! empty object that it fits into. If there aren’t any empty objects it would +//! fit into, it is simply appended to the end of the archive. +//! +//! If an object needs to be updated and the new version is the same +//! size, it is just overwritten. If it is smaller, it is overwritten and the +//! remaining space added as an empty object. It if is larger, it is appended +//! at the end of the archive and the old version replaced by an empty object. +//! +//! For finding objects with a given name, an index is kept. This index is +//! essentially a hash map with a linked list for each bucket. The basic +//! index is created at the beginning of the archive. It consists of an array +//! of pointers to an object who’s name hashes into that bucket. Each +//! object’s header contains a pointer to the next object in the same bucket. +//! An additional bucket contains a pointer to the first empty object. +//! +//! If possible (currently on Unix systems only), the file is memory mapped +//! for faster access. + +use std::{fmt, fs, io, mem}; +use std::borrow::Cow; +use std::hash::Hasher; +use std::marker::PhantomData; +use std::num::{NonZeroU64, NonZeroUsize}; +use std::ops::Range; +use std::path::Path; +use std::io::{Read, Seek, SeekFrom, Write}; +use bytes::Bytes; +use siphasher::sip::SipHasher24; +use crate::utils::sync::{Mutex, MutexGuard}; + + +//------------ Configuration ------------------------------------------------- + +/// The default number of buckets. +/// +/// This value has been picked out of thin air for now. We should probably +/// switch to a model that derives this from from the size of a snapshot. +const DEFAULT_BUCKET_COUNT: usize = 1024; + + +//------------ Archive ------------------------------------------------------- + +/// A simple object archive in a file. +/// +/// An archive is backed by a single file and stores any number of objects +/// identified by a name. Additionally, application-specific meta data can +/// be stored through the type provided via the `Meta` type argument and the +/// [`ObjectMeta`] trait. +/// +/// Object can be added – which is called _publish_ –, update, deleted, and, +/// of course read – which we call _fetch._ +#[derive(Debug)] +pub struct Archive { + /// The physical file. + file: Storage, + + /// The meta data of the archive. + meta: ArchiveMeta, + + /// A marker for the Meta type argument. + marker: PhantomData, +} + +impl Archive { + /// Creates a new archive at the given path. + /// + /// The archive is opened for reading and writing. + /// + /// If there already is a file at the given path, the function fails. + pub fn create(path: impl AsRef) -> Result { + Self::create_with_file( + fs::OpenOptions::new() + .read(true).write(true).create_new(true) + .open(path)? + ) + } + + /// Create a new archive inside a given file. + /// + /// The file is trunacated back to zero length and the header and index + /// added. + pub fn create_with_file( + mut file: fs::File + ) -> Result { + file.set_len(0)?; + let meta = ArchiveMeta::new(DEFAULT_BUCKET_COUNT); + file.write_all(&FILE_MAGIC)?; + meta.write(&mut file)?; + let len = file.stream_position()? + Self::index_size(&meta); + file.set_len(len)?; + + Ok(Self { + file: Storage::new(file, true)?, + meta, + marker: PhantomData, + }) + } + + /// Opens an existing archive at the given path. + /// + /// Returns an error if the file doesn’t start with header and index. + pub fn open( + path: impl AsRef, writable: bool + ) -> Result { + let mut file = + fs::OpenOptions::new().read(true).write(writable).open(path)?; + let mut magic = [0; MAGIC_SIZE]; + file.read_exact(&mut magic)?; + if magic != FILE_MAGIC { + return Err(ArchiveError::Corrupt.into()) + } + let meta = ArchiveMeta::read(&mut file)?; + + Ok(Self { + file: Storage::new(file, writable)?, + meta, + marker: PhantomData, + }) + } + + /// Verifies the consistency of an archive. + /// + /// The method traverses the entire archive and makes sure that the + /// entiry file is covered by objects and that these objects aren’t + /// overlapping. + pub fn verify(&self) -> Result<(), ArchiveError> { + // We’re going to collect a list of all encountered objects in here. + // Items are pair of the start position and the length. + // At the end we check that they form a consecutive sequence. + let mut objects = Vec::new(); + + // Step 1. Go over each index bucket and collect all the objects. + // Check that the name hashes correctly. + for idx in 0.. usize_to_u64(self.meta.bucket_count) { + let mut start = self.get_index(idx)?; + while let Some(pos) = start { + let (header, name) = ObjectHeader::read_with_name( + &self.file, pos.into() + )?; + if self.hash_name(&name) != idx { + return Err(ArchiveError::Corrupt) + } + objects.push((u64::from(pos), header.size)); + start = header.next; + } + } + + // Step 2. Go over the empty space. + let mut start = self.get_empty_index()?; + while let Some(pos) = start { + let header = ObjectHeader::read(&self.file, pos.into())?; + objects.push((u64::from(pos), header.size)); + start = header.next; + } + + // Step 3. Check them objects. + objects.sort_by(|left, right| left.0.cmp(&right.0)); + + for window in objects.windows(2) { + if window[1].0 != window[0].0 + window[0].1 { + return Err(ArchiveError::Corrupt) + } + } + + Ok(()) + } + + /// Returns an iterator over all the objects in the archive. + /// + /// The iterator will _not_ traverse objects in any kind of order. + pub fn objects(&self) -> Result, ArchiveError> { + ObjectsIter::new(self) + } +} + +/// # Access to specific objects +/// +impl Archive { + /// Returns the content of the object with the given name. + /// + /// Assumes that the object exists and returns an error if not. + /// + /// The method returns borrowed data if the archive is currently memory + /// mapped or owned data otherwise. + pub fn fetch( + &self, + name: &[u8], + ) -> Result, FetchError> { + let hash = self.hash_name(name); + let found = match self.find(hash, name)? { + Some(found) => found, + None => return Err(FetchError::NotFound), + }; + self.file.read(found.data_start::(), |read| { + Ok(read.read_slice(found.header.data_size::()?)?) + }) + } + + /// Returns the content of the object with the given name as bytes. + /// + /// Assumes that the object exists and returns an error if not. + pub fn fetch_bytes( + &self, + name: &[u8], + ) -> Result { + self.fetch(name).map(|res| { + match res { + Cow::Borrowed(slice) => Bytes::copy_from_slice(slice), + Cow::Owned(vec) => vec.into() + } + }) + } + + /// Fetch the contents of an object. + /// + /// The object is identified by its `name`. The closure `check` can be + /// used to verify that the object has the expected additional + /// properties stored in the meta data. + /// + /// Upon success, the contents will be returned as a cow. This will be + /// a slice of the memory mapped contents of the backing file if this is + /// available and a vec otherwise. + /// + /// The method will return an error if the file does not exists. It will + /// also return an error if the `check` closure refuses the object. + /// Finally, it will return an error if the archive is discovered to be + /// broken or cannot be accessed. + pub fn fetch_if( + &self, + name: &[u8], + check: impl FnOnce(&Meta) -> Result<(), Meta::ConsistencyError>, + ) -> Result, AccessError> { + let hash = self.hash_name(name); + let found = match self.find(hash, name)? { + Some(found) => found, + None => return Err(AccessError::NotFound), + }; + self.file.read(found.meta_start(), |read| { + check( + &Meta::read(read)? + ).map_err(AccessError::Inconsistent)?; + Ok(read.read_slice(found.header.data_size::()?)?) + }) + } + + /// Publishes (i.e., adds) a new object. + /// + /// The object will be identified by the given `name` and carry the + /// given `meta` data and contents `data`. + /// + /// The method will return an error if there already is an object by + /// `name`. It will also error if the archive is found to be broken or + /// cannot be accessed. + pub fn publish( + &mut self, name: &[u8], meta: &Meta, data: &[u8] + ) -> Result<(), PublishError> { + let hash = self.hash_name(name); + if self.find(hash, name)?.is_some() { + return Err(PublishError::AlreadyExists) + } + match self.find_empty(name, data)? { + Some((empty, pos)) => { + self.publish_replace(hash, name, meta, data, empty, pos)? + } + None => self.publish_append(hash, name, meta, data)?, + } + Ok(()) + } + + /// Publishes a new object in the space of the given empty object. + /// + /// This assumes that the object fits and that there is either no space + /// at the end or that there is enough space to add at least an object + /// header. + /// + /// The empty space starts at `start`. It’s previously used object header + /// is provided through `empty`, which includes the size as well as the + /// next pointer to keep the chain intact. + fn publish_replace( + &mut self, + hash: u64, name: &[u8], meta: &Meta, data: &[u8], + mut empty: ObjectHeader, start: NonZeroU64, + ) -> Result<(), ArchiveError> { + self.unlink_empty(start.into(), empty.next)?; + let empty_end = u64::from(start) + empty.size; + let head = ObjectHeader::new( + Self::object_size(name, data), self.get_index(hash)?, name + ); + let object_end = self.write_object( + start.into(), head, name, meta, data + )?; + self.set_index(hash, start.into())?; + if empty_end > object_end { + empty.size = empty_end - object_end; + assert!(empty.size >= ObjectHeader::SIZE); + empty.next = self.get_empty_index()?; + empty.write(&mut self.file, object_end)?; + self.set_empty_index(NonZeroU64::new(object_end))?; + } + Ok(()) + } + + /// Publishes a new object by appending it to the end of the archive. + fn publish_append( + &mut self, hash: u64, name: &[u8], meta: &Meta, data: &[u8] + ) -> Result<(), ArchiveError> { + let start = self.file.size; + let head = ObjectHeader::new( + Self::object_size(name, data), self.get_index(hash)?, name + ); + self.write_object(start, head, name, meta, data)?; + self.set_index(hash, NonZeroU64::new(start))?; + Ok(()) + } + + /// Updates an object with new meta data and content. + /// + /// The `check` closure received the meta data of the current object and + /// can be used to verify that the current meta data fulfills certain + /// requirements or return a consistency error otherwise. + /// + /// The method will return an error if there is no object with `name`. + /// It will also return an error if the `check` closure fails or if the + /// archive is broken or cannot be accessed. + pub fn update( + &mut self, + name: &[u8], meta: &Meta, data: &[u8], + check: impl FnOnce(&Meta) -> Result<(), Meta::ConsistencyError>, + ) -> Result<(), AccessError> { + let hash = self.hash_name(name); + let found = match self.find(hash, name)? { + Some(found) => found, + None => return Err(AccessError::NotFound), + }; + check( + &self.file.read(found.meta_start(), |read| Meta::read(read))? + ).map_err(AccessError::Inconsistent)?; + + let new_size = Self::object_size(name, data); + if Self::fits(found.header.size, new_size) { + // We can squeeze the new object data into its current space. + ObjectHeader::update_size(found.start, new_size, &mut self.file)?; + self.file.write(found.meta_start(), |write| { + meta.write(write)?; + write.write(data) + })?; + // If there’s empty space, we need to mark and add that. + let empty_size = found.header.size - new_size; + if empty_size > 0 { + self.create_empty( + found.start + new_size, + empty_size, + )?; + } + } + else { + self.delete_found(hash, found)?; + self.publish_append(hash, name, meta, data)?; + } + Ok(()) + } + + /// Deletes an object. + /// + /// The `check` closure received the meta data of the current object and + /// can be used to verify that the current meta data fulfills certain + /// requirements or return a consistency error otherwise. + /// + /// The method will return an error if there is no object with `name`. + /// It will also return an error if the `check` closure fails or if the + /// archive is broken or cannot be accessed. + pub fn delete( + &mut self, + name: &[u8], + check: impl FnOnce(&Meta) -> Result<(), Meta::ConsistencyError>, + ) -> Result<(), AccessError> { + let hash = self.hash_name(name); + let found = match self.find(hash, name)? { + Some(found) => found, + None => return Err(AccessError::NotFound), + }; + check( + &self.file.read(found.meta_start(), |read| Meta::read(read))? + ).map_err(AccessError::Inconsistent)?; + Ok(self.delete_found(hash, found)?) + } + + /// Deletes an object after it has been found. + /// + /// This unlinks the object from its bucket chain and replaces it with an + /// empty object. + fn delete_found( + &mut self, hash: u64, found: FoundObject + ) -> Result<(), ArchiveError> { + match found.prev { + Some(pos) => { + ObjectHeader::update_next( + pos.into(), found.header.next, &mut self.file)? + } + None => self.set_index(hash, found.header.next)?, + } + self.create_empty(found.start, found.header.size)?; + Ok(()) + } + + /// Creates an empty object. + fn create_empty( + &mut self, start: u64, mut size: u64 + ) -> Result<(), ArchiveError> { + let next_start = start.saturating_add(size); + if next_start < self.file.size { + let header = ObjectHeader::read(&self.file, next_start)?; + if header.name_len.is_none() { + self.unlink_empty(next_start, header.next)?; + size += header.size; + } + } + ObjectHeader::new_empty(size, self.get_empty_index()?).write( + &mut self.file, start + )?; + self.set_empty_index(NonZeroU64::new(start))?; + Ok(()) + } + + /// Unlinks an empty object from the empty chain. + fn unlink_empty( + &mut self, start: u64, next: Option + ) -> Result<(), ArchiveError> { + let mut curr = self.get_empty_index()?; + let start = NonZeroU64::new(start); + + // We are the start of the chain. + if curr == start { + self.set_empty_index(next)?; + return Ok(()) + } + + // We are further down the chain. + while let Some(pos) = curr { + let header = ObjectHeader::read(&self.file, pos.into())?; + if header.next == start { + ObjectHeader::update_next(pos.into(), next, &mut self.file)?; + return Ok(()) + } + curr = header.next; + } + + // We are not in the chain at all??? + Err(ArchiveError::Corrupt) + } + + /// Finds the start of the object with the given name. + fn find( + &self, hash: u64, name: &[u8] + ) -> Result, ArchiveError> { + let mut start = self.get_index(hash)?; + let mut prev = None; + while let Some(pos) = start { + let (header, object_name) = ObjectHeader::read_with_name( + &self.file, pos.into() + )?; + if name == object_name.as_ref() { + return Ok(Some(FoundObject { + start: pos.into(), + header, + prev, + })) + } + prev = Some(pos); + start = header.next; + } + Ok(None) + } + + /// Finds empty space large enough to contain the given data. + /// + /// Returns `None` if no such space can be found. Otherwise returns + /// the object header of the empty space and the starting position. + fn find_empty( + &self, name: &[u8], data: &[u8] + ) -> Result, ArchiveError> { + let mut start = self.get_empty_index()?; + if start.is_none() { + return Ok(None) + } + let size = Self::object_size(name, data); + let mut candidates = Vec::new(); + while let Some(pos) = start { + let header = ObjectHeader::read(&self.file, pos.into())?; + start = header.next; + if Self::fits(header.size, size) { + candidates.push((header, pos)); + } + } + if candidates.is_empty() { + return Ok(None) + } + candidates.sort_by(|left, right| left.0.size.cmp(&right.0.size)); + Ok(candidates.first().copied()) + } + + /// Writes an object. + fn write_object( + &mut self, start: u64, + head: ObjectHeader, name: &[u8], meta: &Meta, data: &[u8] + ) -> Result { + self.file.write(start, |write| { + head.write_into(write)?; + write.write(name)?; + meta.write(write)?; + write.write(data)?; + Ok(write.pos()?) + }) + } + + /// Returns the size of an object with the given name and content. + fn object_size(name: &[u8], data: &[u8]) -> u64 { + ObjectHeader::SIZE + + usize_to_u64(name.len()) + + usize_to_u64(Meta::SIZE) + + usize_to_u64(data.len()) + } + + /// Returns whether an object fits into a given space. + /// + /// Specifically, checks that an object of a total size of `object_size` + /// (i.e., including header and name and meta) fits into empty space of + /// a total size of `empty_size`. This is true if they are the same or + /// if there is enough space left to add an empty object. + fn fits(empty_size: u64, object_size: u64) -> bool { + // Either the object fits exactly or there is enough space to add + // an object header + empty_size == object_size + || empty_size >= object_size + ObjectHeader::SIZE + } +} + + +/// # Access to the Index +/// +impl Archive { + /// The size of a single bucket. + /// + /// This is equal to the size of the integer type we are using for archive + /// positions, i.e., `u64`. + const BUCKET_SIZE: usize = mem::size_of::(); + + /// Returns the hash value for a given name. + /// + /// The returned value will already be taken modulo the number of buckets, + /// i.e., this is actually the bucket index for the name, not really its + /// hash. + fn hash_name(&self, name: &[u8]) -> u64 { + let mut hasher = SipHasher24::new_with_key(&self.meta.hash_key); + hasher.write(name); + hasher.finish() % usize_to_u64(self.meta.bucket_count) + } + + /// Returns the size of the index. + /// + /// There are one more buckets than the archive’s bucket count since that + /// count is without the empty bucket. + fn index_size(meta: &ArchiveMeta) -> u64 { + usize_to_u64( + (meta.bucket_count + 1) * Self::BUCKET_SIZE + ) + } + + /// Returns the archive position of the bucket for `hash`. + fn index_pos(&self, hash: u64) -> u64 { + usize_to_u64(MAGIC_SIZE) + ArchiveMeta::size() + + hash * usize_to_u64(Self::BUCKET_SIZE) + } + + /// Returns the archive position for the empty bucket. + /// + /// The empty bucket lives behind all the other buckets. + fn empty_index_pos(&self) -> u64 { + usize_to_u64(MAGIC_SIZE) + ArchiveMeta::size() + + usize_to_u64(self.meta.bucket_count * Self::BUCKET_SIZE) + } + + /// Returns the archive position of the first object with `hash`. + fn get_index( + &self, hash: u64 + ) -> Result, ArchiveError> { + Ok(NonZeroU64::new( + self.file.read(self.index_pos(hash), |read| read.read_u64())? + )) + } + + /// Returns the archive position of the first empty object. + fn get_empty_index(&self) -> Result, ArchiveError> { + Ok(NonZeroU64::new( + self.file.read(self.empty_index_pos(),|read| read.read_u64())? + )) + } + + /// Updates the archive position of the first object with `hash`. + fn set_index( + &mut self, hash: u64, pos: Option, + ) -> Result<(), ArchiveError> { + self.file.write(self.index_pos(hash), |write| { + write.write_u64(pos.map(Into::into).unwrap_or(0)) + }) + } + + /// Updates the archive position of the first empty object. + fn set_empty_index( + &mut self, pos: Option + ) -> Result<(), ArchiveError> { + self.file.write(self.empty_index_pos(), |write| { + write.write_u64(pos.map(Into::into).unwrap_or(0)) + }) + } +} + + +//------------ ObjectsIter --------------------------------------------------- + +/// An iterator over the objects in an archive. +/// +/// The iterator returns tuples of name, meta, and content. It can be +/// acquired via [`Archive::objects`]. +pub struct ObjectsIter<'a, Meta> { + /// The archive we are operating on. + archive: &'a Archive, + + /// The remaining buckets we haven’t visited yet. + buckets: Range, + + /// The next item in the currently visited bucket. + next: Option, +} + +impl<'a, Meta> ObjectsIter<'a, Meta> { + /// Creates a new iterator. + fn new(archive: &'a Archive) -> Result { + Ok(Self { + archive, + buckets: 1..usize_to_u64(archive.meta.bucket_count), + next: archive.get_index(0)?, + }) + } +} + +impl<'a, Meta: ObjectMeta> ObjectsIter<'a, Meta> { + /// Returns the next item. + /// + /// This method returns the transposed result so we can use the question + /// mark operator. + #[allow(clippy::type_complexity)] + fn transposed_next( + &mut self + ) -> Result, Meta, Cow<'a, [u8]>)>, ArchiveError> { + loop { + if let Some(pos) = self.next { + let (next, res) = self.archive.file.read(pos.into(), |read| { + let header = ObjectHeader::read_from(read)?; + let name_len = match header.name_len { + Some(len) => len, + None => return Err(ArchiveError::Corrupt) + }; + let name = read.read_slice(name_len)?; + let meta = Meta::read(read)?; + let data = read.read_slice(header.data_size::()?)?; + Ok((header.next, (name, meta, data))) + })?; + self.next = next; + return Ok(Some(res)) + } + let idx = match self.buckets.next() { + Some(idx) => idx, + None => return Ok(None) + }; + self.next = self.archive.get_index(idx)?; + } + } +} + +impl<'a, Meta: ObjectMeta> Iterator for ObjectsIter<'a, Meta> { + type Item = Result<(Cow<'a, [u8]>, Meta, Cow<'a, [u8]>), ArchiveError>; + + fn next(&mut self) -> Option { + self.transposed_next().transpose() + } +} + + +//------------ ObjectMeta ---------------------------------------------------- + +/// A type representing meta data of an object. +/// +/// A value of a type of this trait is stored with every object in an archive. +/// Values need to be of fixed size. +pub trait ObjectMeta: Sized { + /// The size of the stored meta data. + /// + /// The `write` method needs to always write this many bytes if + /// successful, and `read` needs to always read this many bytes. + const SIZE: usize; + + /// The error type returned by the check closures. + type ConsistencyError: fmt::Debug; + + /// Write a meta data value. + /// + /// This method must try to write exactly `Self::SIZE` bytes. + fn write(&self, write: &mut StorageWrite) -> Result<(), ArchiveError>; + + /// Read a meta data value. + /// + /// This method must try to read exactly `Self::SIZE` bytes. + fn read(read: &mut StorageRead) -> Result; +} + + +//------------ ArchiveMeta --------------------------------------------------- + +/// The meta data of an archive. +/// +/// This is stored at the beginning of a file right after the magic cookie. +#[derive(Default, Debug)] +struct ArchiveMeta { + /// The key for the hasher. + hash_key: [u8; 16], + + /// The number of hash buckets. + bucket_count: usize, +} + +impl ArchiveMeta { + /// Creates a new value. + /// + /// This uses a random hash key and the given bucket number. + fn new(bucket_count: usize) -> Self { + ArchiveMeta { + hash_key: rand::random(), + bucket_count, + } + } + + /// Returns the size of the encoded archive meta data. + const fn size() -> u64 { + usize_to_u64( + mem::size_of::<[u8; 16]>() + mem::size_of::() + ) + } + + /// Write the data to a file. + fn write(&self, target: &mut impl io::Write) -> Result<(), io::Error> { + target.write_all(&self.hash_key)?; + target.write_all(&self.bucket_count.to_ne_bytes())?; + Ok(()) + } + + /// Reads the data from a file. + fn read(source: &mut impl io::Read) -> Result { + let mut res = Self::default(); + source.read_exact(&mut res.hash_key)?; + let mut buf = [0u8; mem::size_of::()]; + source.read_exact(&mut buf)?; + res.bucket_count = usize::from_ne_bytes(buf); + Ok(res) + } +} + + +//------------ ObjectHeader -------------------------------------------------- + +/// The header of an object. +/// +/// This header is of a fixed size and is followed directly by the name, meta. +/// and content. +#[derive(Clone, Copy, Debug)] +struct ObjectHeader { + /// The size of the object including the header. + size: u64, + + /// The next object of the hash bucket. + next: Option, + + /// The size of the name. + /// + /// If this is `None`, this object is an empty object. + name_len: Option, +} + +impl ObjectHeader { + /// Creates a new object header. + fn new( + size: u64, next: Option, name: &[u8] + ) -> Self { + ObjectHeader { size, next, name_len: Some(name.len()) } + } + + /// Creates a new object header for an empty object. + fn new_empty(size: u64, next: Option) -> Self { + ObjectHeader { size, next, name_len: None } + } + + /// Reads the contents of the header from a storage reader. + fn read_from(read: &mut StorageRead) -> Result { + Ok(Self { + size: read.read_u64()?, + next: NonZeroU64::new(read.read_u64()?), + name_len: read.read_opt_usize()?, + }) + } + + /// Reads the header from the given archive position. + fn read( + storage: &Storage, start: u64 + ) -> Result { + storage.read(start, Self::read_from) + } + + /// Reads the header and name from the given archive position. + fn read_with_name( + storage: &Storage, start: u64 + ) -> Result<(Self, Cow<[u8]>), ArchiveError> { + storage.read(start, |read| { + let header = Self::read_from(read)?; + let name_len = match header.name_len { + Some(len) => len, + None => return Err(ArchiveError::Corrupt), + }; + let name = read.read_slice(name_len)?; + Ok((header, name)) + }) + } + + /// Writes the header into the given storage writer. + fn write_into( + &self, write: &mut StorageWrite + ) -> Result<(), ArchiveError> { + write.write_u64(self.size)?; + write.write_nonzero_u64(self.next)?; + write.write_opt_usize(self.name_len)?; + Ok(()) + } + + /// Writes the header at the given archive position. + fn write( + &self, storage: &mut Storage, start: u64 + ) -> Result<(), ArchiveError> { + storage.write(start, |write| self.write_into(write)) + } + + /// Updates the object size of a header beginning at the given position. + fn update_size( + start: u64, new_size: u64, storage: &mut Storage + ) -> Result<(), ArchiveError> { + storage.write(start, |write| write.write_u64(new_size)) + } + + /// Updates the next pointer of a header beginning at the given position. + fn update_next( + start: u64, new_next: Option, storage: &mut Storage + ) -> Result<(), ArchiveError> { + storage.write( + start + usize_to_u64(mem::size_of::()), + |write| write.write_nonzero_u64(new_next), + ) + } + + /// The written size of the header. + const SIZE: u64 = usize_to_u64( + mem::size_of::() + + mem::size_of::() + + Storage::OPT_USIZE_SIZE + ); + + /// Returns the start of the meta data. + fn meta_start(&self, start: u64) -> u64 { + start + Self::SIZE + opt_usize_to_u64(self.name_len) + } + + /// Returns the start of the content. + fn data_start(&self, start: u64) -> u64 { + start + Self::SIZE + + usize_to_u64(Meta::SIZE) + + opt_usize_to_u64(self.name_len) + } + + /// Returns the size of the data. + fn data_size(&self) -> Result { + let name_len = match self.name_len { + Some(len) => usize_to_u64(len), + None => return Err(ArchiveError::Corrupt) + }; + usize::try_from( + self.size - Self::SIZE- usize_to_u64(Meta::SIZE) - name_len + ).map_err(|_| ArchiveError::Corrupt) + } +} + + +//------------ FoundObject --------------------------------------------------- + +/// Information about an object found in the archive. +/// +/// This is just so we don’t need to juggle tuples all the time. +struct FoundObject { + /// The start position of the object. + start: u64, + + /// The heeader of the object. + header: ObjectHeader, + + /// The start position of the previous object with the same hash. + prev: Option, +} + +impl FoundObject { + /// Returns the start of the meta data. + fn meta_start(&self) -> u64 { + self.header.meta_start(self.start) + } + + /// Returns the start of the content. + fn data_start(&self) -> u64 { + self.header.data_start::(self.start) + } +} + + +//------------ Magic Cookie -------------------------------------------------- +// +// The marker we use for a quick file type check. + +#[cfg(all(target_endian = "little", target_pointer_width = "16"))] +const SYSTEM: u8 = b'A'; + +#[cfg(all(target_endian = "little", target_pointer_width = "32"))] +const SYSTEM: u8 = b'B'; + +#[cfg(all(target_endian = "little", target_pointer_width = "64"))] +const SYSTEM: u8 = b'C'; + +#[cfg(all(target_endian = "big", target_pointer_width = "16"))] +const SYSTEM: u8 = b'D'; + +#[cfg(all(target_endian = "big", target_pointer_width = "32"))] +const SYSTEM: u8 = b'E'; + +#[cfg(all(target_endian = "big", target_pointer_width = "64"))] +const SYSTEM: u8 = b'F'; + +const VERSION: u8 = 0; + +const MAGIC_SIZE: usize = 6; +const FILE_MAGIC: [u8; MAGIC_SIZE] = [ + b'R', b'T', b'N', b'R', VERSION, SYSTEM, +]; + + +//============ Physical File Access ========================================== + +//------------ Storage ------------------------------------------------------- + +/// The underlying storage of an archive. +#[derive(Debug)] +struct Storage { + /// The physical file. + /// + /// This is protected by a mutex so the archive can be shared. + file: Mutex, + + /// The optional memory map. + #[cfg(unix)] + mmap: Option, + + /// Do we need write permissions? + #[cfg(unix)] + writable: bool, + + /// The size of the archive. + size: u64, +} + +impl Storage { + /// Creates a new storage value using the given file. + #[allow(unused_variables)] + pub fn new(file: fs::File, writable: bool) -> Result { + let mut res = Self { + file: Mutex::new(file), + #[cfg(unix)] + mmap: None, + #[cfg(unix)] + writable, + size: 0, + }; + res.mmap()?; + Ok(res) + } + + /// Re-memory maps the storage. + /// + /// You can un-memory map the storage by setting `self.mmap` to `None`. + fn mmap(&mut self) -> Result<(), io::Error> { + #[cfg(unix)] + { + self.mmap = mmapimpl::Mmap::new( + &mut self.file.lock(), self.writable + )?; + if let Some(mmap) = self.mmap.as_ref() { + self.size = mmap.size(); + return Ok(()) + } + } + + let mut file = self.file.lock(); + file.seek(SeekFrom::End(0))?; + self.size = file.stream_position()?; + Ok(()) + } + + /// Starts reading from the storage at the given position. + pub fn read<'s, T, E: From>( + &'s self, + start: u64, + op: impl FnOnce(&mut StorageRead<'s>) -> Result + ) -> Result { + op(&mut StorageRead::new(self, start)?) + } + + /// Starts writing to the storage at the given position. + /// + /// If `start` is equal to the size of the archive, starts appending. + pub fn write( + &mut self, + start: u64, + op: impl FnOnce(&mut StorageWrite) -> Result + ) -> Result { + let mut write = if self.size == start { + StorageWrite::new_append(self)? + } + else { + StorageWrite::new(self, start)? + }; + let res = op(&mut write)?; + if write.finish()? { + self.mmap()?; + } + Ok(res) + } +} + +/// # Stored size constants +/// +/// They live here purely for the naming to make some sort of sense. +impl Storage { + const OPT_USIZE_SIZE: usize + = mem::size_of::() + mem::size_of::(); +} + + +//------------ StorageRead --------------------------------------------------- + +/// Reading data from the underlying storage. +#[derive(Debug)] +pub struct StorageRead<'a>(ReadInner<'a>); + +/// How are we reading? +#[derive(Debug)] +enum ReadInner<'a> { + /// The storage is memory-mapped and we read from there. + #[cfg(unix)] + Mmap { + /// The memory map. + mmap: &'a mmapimpl::Mmap, + + /// The current read position. + pos: u64, + }, + + /// The storage is not memory-mapped and we read from the file. + File { + file: MutexGuard<'a, fs::File>, + } +} + +impl<'a> StorageRead<'a> { + /// Creates a new storage reader. + fn new(storage: &'a Storage, start: u64) -> Result { + if start > storage.size { + return Err(io::Error::new( + io::ErrorKind::UnexpectedEof, + "unexpected EOF" + ).into()) + } + + #[cfg(unix)] + if let Some(mmap) = storage.mmap.as_ref() { + return Ok(StorageRead( + ReadInner::Mmap { mmap, pos: start } + )) + } + + let mut file = storage.file.lock(); + file.seek(SeekFrom::Start(start))?; + Ok(StorageRead( + ReadInner::File { file } + )) + } + + /// Returns the current read position. + pub fn pos(&mut self) -> Result { + match self.0 { + #[cfg(unix)] + ReadInner::Mmap { pos, .. } => Ok(pos), + ReadInner::File { ref mut file } => Ok(file.stream_position()?), + } + } + + /// Reads data into a provided buffer. + pub fn read_into( + &mut self, buf: &mut [u8] + ) -> Result<(), ArchiveError> { + match self.0 { + #[cfg(unix)] + ReadInner::Mmap { mmap, ref mut pos } => { + *pos = mmap.read_into(*pos, buf)?; + Ok(()) + } + ReadInner::File { ref mut file } => { + Ok(file.read_exact(buf)?) + } + } + } + + /// Reads a slice of data. + /// + /// If the storage is memory-mapped, this will return a slice into the + /// mapped region. Otherwise a vec will be allocated. + pub fn read_slice( + &mut self, len: usize, + ) -> Result, ArchiveError> { + match self.0 { + #[cfg(unix)] + ReadInner::Mmap { mmap, ref mut pos } => { + let (res, end) = mmap.read(*pos, len)?; + *pos = end; + Ok(res) + } + ReadInner::File { ref mut file } => { + // XXX This may or may not be sound. We’re not using read_exact + // just to be a little more sure? + let mut buf = Vec::with_capacity(len); + let mut len = len; + unsafe { + buf.set_len(len); + let mut buf = buf.as_mut_slice(); + while len > 0 { + let read = file.read(buf)?; + if read == 0 { + return Err(io::Error::new( + io::ErrorKind::UnexpectedEof, + "unexpected end of file" + ).into()) + } + + // Let’s not panic if Read::read is broken and rather + // error out. + buf = match buf.get_mut(read..) { + Some(buf) => buf, + None => { + return Err(io::Error::new( + io::ErrorKind::Other, + "read claimed to read beyond buffer len" + ).into()) + } + }; + + len -= read; + } + } + Ok(buf.into()) + } + } + } + + /// Reads a byte array. + pub fn read_array( + &mut self + ) -> Result<[u8; N], ArchiveError> { + let mut res = [0; N]; + self.read_into(&mut res)?; + Ok(res) + } + + /// Reads a `usize`. + pub fn read_usize(&mut self) -> Result { + Ok(usize::from_ne_bytes(self.read_array()?)) + } + + /// Reads an optional `usize`. + /// + /// We don’t do any optimisations here and instead store this is an + /// one-byte boolean and, if that is 1, the length as a usize. + pub fn read_opt_usize(&mut self) -> Result, ArchiveError> { + let opt = self.read_array::<1>()?; + let size = self.read_usize()?; + match opt[0] { + 0 => { + if size != 0 { + Err(ArchiveError::Corrupt) + } + else { + Ok(None) + } + } + 1 => Ok(Some(size)), + _ => Err(ArchiveError::Corrupt), + } + } + + /// Reads a `u64`. + pub fn read_u64(&mut self) -> Result { + Ok(u64::from_ne_bytes(self.read_array()?)) + } +} + + +//------------ StorageWrite -------------------------------------------------- + +/// Writing data to storage. +#[derive(Debug)] +pub struct StorageWrite<'a>(WriteInner<'a>); + +/// How are we writing, exactly? +#[derive(Debug)] +enum WriteInner<'a> { + /// We are writing into a memory mapped region. + #[cfg(unix)] + Mmap { + /// The memory-map. + mmap: &'a mut mmapimpl::Mmap, + + /// The current write position. + pos: u64, + }, + + /// We are overwriting a portion of the underlying file. + Overwrite { + file: MutexGuard<'a, fs::File>, + }, + + /// We are appending to the underlying file. + Append { + file: MutexGuard<'a, fs::File>, + }, +} + +impl<'a> StorageWrite<'a> { + /// Creates a new storage writer for overwriting existing data.. + fn new( + storage: &'a mut Storage, pos: u64 + ) -> Result { + if pos >= storage.size { + return Err(ArchiveError::Corrupt) + } + + #[cfg(unix)] + match storage.mmap.as_mut() { + Some(mmap) => { + Ok(Self(WriteInner::Mmap { mmap, pos, })) + } + None => { + let mut file = storage.file.lock(); + file.seek(SeekFrom::Start(pos))?; + Ok(Self(WriteInner::Overwrite { file })) + } + } + + #[cfg(not(unix))] + { + let mut file = storage.file.lock(); + file.seek(SeekFrom::Start(pos))?; + Ok(Self(WriteInner::Overwrite { file })) + } + } + + /// Creates a new storage writer for appending data. + fn new_append(storage: &'a mut Storage) -> Result { + #[cfg(unix)] + if let Some(mmap) = storage.mmap.take() { + drop(mmap) + } + let mut file = storage.file.lock(); + file.seek(SeekFrom::End(0))?; + Ok(Self(WriteInner::Append { file })) + } + + /// Finishes writing. + /// + /// Returns whether a memory-map needs to be renewed. + fn finish(self) -> Result { + match self.0 { + #[cfg(unix)] + WriteInner::Mmap { mmap, .. } => { + mmap.sync()?; + Ok(false) + } + WriteInner::Overwrite { mut file } => { + file.flush()?; + Ok(false) + } + WriteInner::Append { mut file } => { + file.flush()?; + Ok(true) + } + } + } + + /// Returns the current writing position. + pub fn pos(&mut self) -> Result { + match self.0 { + #[cfg(unix)] + WriteInner::Mmap { pos, .. } => Ok(pos), + WriteInner::Overwrite { ref mut file } => file.stream_position(), + WriteInner::Append { ref mut file } => file.stream_position(), + } + } + + /// Writes data to storage. + /// + /// Note that because a storage writer either overwrites existing data or + /// appends new data, this may fail with an EOF error if you reach the + /// end of the file in the overwrite case. + pub fn write( + &mut self, data: &[u8] + ) -> Result<(), ArchiveError> { + match self.0 { + #[cfg(unix)] + WriteInner::Mmap { ref mut mmap, ref mut pos } => { + *pos = mmap.write(*pos, data)?; + Ok(()) + } + WriteInner::Overwrite { ref mut file } => { + Ok(file.write_all(data)?) + } + WriteInner::Append { ref mut file, .. } => { + Ok(file.write_all(data)?) + } + } + } + + /// Writes a `usize` to storage. + pub fn write_usize(&mut self, value: usize) -> Result<(), ArchiveError> { + self.write(&value.to_ne_bytes()) + } + + /// Write an optional `usize` to storage. + pub fn write_opt_usize( + &mut self, value: Option + ) -> Result<(), ArchiveError> { + match value { + Some(value) => { + self.write(b"\x01")?; + self.write_usize(value)?; + } + None => { + self.write(b"\0")?; + self.write_usize(0)?; + } + } + Ok(()) + } + + /// Writes a `u64` to storage. + pub fn write_u64(&mut self, value: u64) -> Result<(), ArchiveError> { + self.write(&value.to_ne_bytes()) + } + + /// Writes a `Option` to storage. + pub fn write_nonzero_usize( + &mut self, value: Option + ) -> Result<(), ArchiveError> { + self.write(&value.map(Into::into).unwrap_or(0).to_ne_bytes()) + } + + /// Writes a `Option` to storage. + pub fn write_nonzero_u64( + &mut self, value: Option + ) -> Result<(), ArchiveError> { + self.write(&value.map(Into::into).unwrap_or(0).to_ne_bytes()) + } + +} + + +//------------ Mmap ----------------------------------------------------------# + +#[cfg(unix)] +mod mmapimpl { + use std::{fs, io, slice}; + use std::borrow::Cow; + use std::ffi::c_void; + use std::io::{Seek, SeekFrom}; + use nix::sys::mman::{MapFlags, MsFlags, ProtFlags, mmap, msync, munmap}; + + + /// A memory-mapped file. + #[derive(Debug)] + pub struct Mmap { + /// The pointer to the start of the memory. + ptr: *mut c_void, + + /// The size of the memory, + len: usize, + } + + impl Mmap { + /// Creates a new value mapping the given file and mode. + pub fn new( + file: &mut fs::File, + writable: bool, + ) -> Result, io::Error> { + file.seek(SeekFrom::End(0))?; + let size = file.stream_position()?; + file.rewind()?; + let size = match usize::try_from(size).and_then(TryInto::try_into) { + Ok(size) => size, + Err(_) => return Ok(None) + }; + let ptr = unsafe { + mmap( + None, size, + if writable { + ProtFlags::PROT_READ | ProtFlags::PROT_WRITE + } + else { + ProtFlags::PROT_READ + }, + MapFlags::MAP_SHARED, + Some(file), + 0 + )? + }; + Ok(Some(Mmap { ptr, len: size.into() })) + } + + /// Returns the size of the mapped file. + pub fn size(&self) -> u64 { + super::usize_to_u64(self.len) + } + } + + impl Drop for Mmap { + fn drop(&mut self) { + unsafe { + let _ = munmap(self.ptr, self.len); // XXX Error handling? + } + } + } + + impl Mmap { + /// Returns the whole memory map. + fn as_slice(&self) -> &[u8] { + unsafe { slice::from_raw_parts(self.ptr as *const u8, self.len) } + } + + /// Returns the whole memory map mutably. + fn as_slice_mut(&mut self) -> &mut [u8] { + unsafe { slice::from_raw_parts_mut(self.ptr as *mut u8, self.len) } + } + } + + impl Mmap { + /// Reads data into the given buffer. + pub fn read_into( + &self, start: u64, buf: &mut [u8] + ) -> Result { + let (slice, end) = self.read(start, buf.len())?; + buf.copy_from_slice(slice.as_ref()); + Ok(end) + } + + /// Returns a cow of the given data. + /// + /// This will always be borrowed. + pub fn read( + &self, start: u64, len: usize, + ) -> Result<(Cow<[u8]>, u64), io::Error> { + let start = match usize::try_from(start) { + Ok(start) => start, + Err(_) => { + return Err(io::Error::new( + io::ErrorKind::UnexpectedEof, "unexpected EOF" + )) + } + }; + let end = match start.checked_add(len) { + Some(end) => end, + None => { + return Err(io::Error::new( + io::ErrorKind::UnexpectedEof, "unexpected EOF" + )) + } + }; + if end > self.len { + return Err(io::Error::new( + io::ErrorKind::UnexpectedEof, "unexpected EOF" + )) + } + Ok((self.as_slice()[start..end].into(), super::usize_to_u64(end))) + } + + /// Writes the given data starting at the given position. + /// + /// The data needs to fully fit into the current memory block. + pub fn write( + &mut self, start: u64, data: &[u8] + ) -> Result { + let start = match usize::try_from(start) { + Ok(start) => start, + Err(_) => { + return Err(io::Error::new( + io::ErrorKind::UnexpectedEof, "unexpected EOF" + )) + } + }; + let end = match start.checked_add(data.len()) { + Some(end) => end, + None => { + return Err(io::Error::new( + io::ErrorKind::UnexpectedEof, "unexpected EOF" + )) + } + }; + if end > self.len { + return Err(io::Error::new( + io::ErrorKind::UnexpectedEof, "unexpected EOF" + )) + } + self.as_slice_mut()[start..end].copy_from_slice(data); + Ok(super::usize_to_u64(end)) + } + + /// Synchronizes the memory mapped data onto disk. + pub fn sync(&self) -> Result<(), io::Error> { + unsafe { + Ok(msync(self.ptr, self.len, MsFlags::MS_ASYNC)?) + } + } + } + + unsafe impl Sync for Mmap { } + unsafe impl Send for Mmap { } +} + + +//============ Helper Function =============================================== + +/// Converts a usize to a u64. +/// +/// This will panic on systems where a usize doesn’t fit into a u64 if the +/// value is too big. +const fn usize_to_u64(value: usize) -> u64 { + #[cfg(not(any( + target_pointer_width = "16", + target_pointer_width = "32", + target_pointer_width = "64", + )))] + assert!(value <= u64::MAX as usize); + value as u64 +} + +/// Converts an optional usize into a u64. +fn opt_usize_to_u64(value: Option) -> u64 { + usize_to_u64(value.map(Into::into).unwrap_or(0)) +} + + +//============ Error Types =================================================== + +//------------ ArchiveError -------------------------------------------------- + +/// An error happened while trying to access the archive. +#[derive(Debug)] +pub enum ArchiveError { + /// The archive is corrupt and cannot be used any more. + Corrupt, + + /// An IO error happened while accessing the underlying file. + Io(io::Error), +} + +impl From for ArchiveError { + fn from(err: io::Error) -> Self { + Self::Io(err) + } +} + +impl fmt::Display for ArchiveError { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match *self { + ArchiveError::Corrupt => f.write_str("archive corrupted"), + ArchiveError::Io(ref err) => write!(f, "{}", err) + } + } +} + +//------------ OpenError ----------------------------------------------------- + +/// An error happened while opening an existing archive. +#[derive(Debug)] +pub enum OpenError { + /// The archive does not exist. + NotFound, + + /// An error happened while trying to access the archive. + Archive(ArchiveError), +} + +impl From for OpenError { + fn from(err: io::Error) -> Self { + ArchiveError::Io(err).into() + } +} + +impl From for OpenError { + fn from(err: ArchiveError) -> Self { + match err { + ArchiveError::Io(err) if matches!( + err.kind(), io::ErrorKind::NotFound + ) => Self::NotFound, + _ => Self::Archive(err), + } + } +} + +impl fmt::Display for OpenError { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match *self { + OpenError::NotFound => f.write_str("not found"), + OpenError::Archive(ref err) => write!(f, "{}", err), + } + } +} + + +//------------ PublishError -------------------------------------------------- + +/// An error happened while publishing an object. +#[derive(Debug)] +pub enum PublishError { + /// The object already exists. + AlreadyExists, + + /// An error happened while trying to access the archive. + Archive(ArchiveError), +} + +impl From for PublishError { + fn from(err: ArchiveError) -> Self { + Self::Archive(err) + } +} + + +//------------ AccessError --------------------------------------------------- + +/// An error happened while publishing an object. +#[derive(Debug)] +pub enum AccessError { + /// The object does not exist. + NotFound, + + /// The object’s meta data is wrong. + Inconsistent(T), + + /// An error happened while trying to access the archive. + Archive(ArchiveError), +} + +impl From for AccessError { + fn from(err: ArchiveError) -> Self { + Self::Archive(err) + } +} + + +//------------ FetchError ---------------------------------------------------- + +/// An error happened while publishing an object. +#[derive(Debug)] +pub enum FetchError { + /// The object does not exist. + NotFound, + + /// An error happened while trying to access the archive. + Archive(ArchiveError), +} + +impl From for FetchError { + fn from(err: ArchiveError) -> Self { + Self::Archive(err) + } +} + + +//============ Testing ======================================================= + +#[cfg(test)] +mod test { + use super::*; + use std::collections::HashMap; + + #[derive(Clone, Copy, Debug)] + enum Op { + Publish { name: &'static [u8], data: &'static [u8] }, + Update { name: &'static [u8], data: &'static [u8] }, + Delete { name: &'static [u8] }, + } + + use self::Op::*; + + impl ObjectMeta for () { + const SIZE: usize = 4; + type ConsistencyError = (); + + fn write( + &self, write: &mut StorageWrite + ) -> Result<(), ArchiveError> { + write.write(b"abcd") + } + + fn read( + read: &mut StorageRead + ) -> Result { + let slice = read.read_slice(4).unwrap(); + assert_eq!(slice.as_ref(), b"abcd"); + Ok(()) + } + } + + fn check_archive( + archive: &Archive<()>, + content: &HashMap<&'static [u8], &'static [u8]>, + ) { + archive.verify().unwrap(); + let mut content = content.clone(); + for item in archive.objects().unwrap() { + let (name, _, data) = item.unwrap(); + assert_eq!( + content.remove(name.as_ref()), + Some(data.as_ref()) + ); + } + assert!(content.is_empty()); + } + + fn run_archive(ops: impl IntoIterator) { + let mut archive = Archive::create_with_file( + tempfile::tempfile().unwrap() + ).unwrap(); + let mut content = HashMap::new(); + + for item in ops { + match item { + Op::Publish { name, data } => { + assert!(content.insert(name, data).is_none()); + archive.publish(name, &(), data).unwrap(); + check_archive(&archive, &content); + assert_eq!( + archive.fetch(name).unwrap().as_ref(), + data + ); + } + Op::Update { name, data } => { + assert!(content.insert(name, data).is_some()); + archive.update(name, &(), data, |_| Ok(())).unwrap(); + assert_eq!( + archive.fetch(name).unwrap().as_ref(), + data + ); + } + Op::Delete { name } => { + assert!(content.remove(name).is_some()); + archive.delete(name, |_| Ok(())).unwrap(); + assert!(matches!( + archive.fetch(name), + Err(FetchError::NotFound) + )); + } + } + + check_archive(&archive, &content); + } + } + + #[test] + fn empty_archive() { + run_archive([]) + } + + #[test] + fn publish_replace() { + run_archive([ + Publish { name: b"1", data: b"bar" }, + Publish { name: b"2", data: &[0; 1024]}, + Publish { name: b"3", data: b"aaa" }, + Delete { name: b"2" }, + Publish { name: b"4", data: b"bar" }, + Update { name: b"4", data: b"bar" }, + ]) + } +} + diff --git a/src/utils/mod.rs b/src/utils/mod.rs index 0874df30..91a688cb 100644 --- a/src/utils/mod.rs +++ b/src/utils/mod.rs @@ -1,5 +1,6 @@ //! Various useful things. +pub mod archive; pub mod binio; pub mod date; pub mod dump; diff --git a/src/utils/sync.rs b/src/utils/sync.rs index 6f60ceda..59378f63 100644 --- a/src/utils/sync.rs +++ b/src/utils/sync.rs @@ -2,7 +2,6 @@ use std::sync::{Mutex as StdMutex, RwLock as StdRwLock}; - pub use std::sync::{MutexGuard, RwLockReadGuard, RwLockWriteGuard};