diff --git a/Cargo.toml b/Cargo.toml index efcbda9..dc54a72 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -43,3 +43,13 @@ tokio = { version = "1", features = ["test-util", "macros", "rt"] } [package.metadata.docs.rs] all-features = true + +[lints.rust] +unsafe_code = "forbid" +unused_qualifications = "warn" + +[lints.clippy] +pedantic = { level = "warn", priority = -1 } +missing_errors_doc = "allow" +module_name_repetitions = "allow" +similar_names = "allow" diff --git a/src/async_reader.rs b/src/async_reader.rs index bb9412f..48c56bd 100644 --- a/src/async_reader.rs +++ b/src/async_reader.rs @@ -1,3 +1,7 @@ +// FIXME: This seems like a bug - there are lots of u64 to usize conversions in this file, +// so any file larger than 4GB, or an untrusted file with bad data may crash. +#![allow(clippy::cast_possible_truncation)] + #[cfg(feature = "mmap-async-tokio")] use std::path::Path; @@ -9,7 +13,10 @@ use reqwest::{Client, IntoUrl}; #[cfg(any(feature = "http-async", feature = "mmap-async-tokio"))] use tokio::io::AsyncReadExt; -use crate::directory::{Directory, Entry}; +use crate::cache::DirCacheResult; +#[cfg(any(feature = "http-async", feature = "mmap-async-tokio"))] +use crate::cache::{DirectoryCache, NoCache}; +use crate::directory::{DirEntry, Directory}; use crate::error::PmtError; use crate::header::{HEADER_SIZE, MAX_INITIAL_BYTES}; #[cfg(feature = "http-async")] @@ -19,17 +26,27 @@ use crate::mmap::MmapBackend; use crate::tile::tile_id; use crate::{Compression, Header}; -pub struct AsyncPmTilesReader { +pub struct AsyncPmTilesReader { backend: B, + cache: C, header: Header, root_directory: Directory, } -impl AsyncPmTilesReader { - /// Creates a new reader from a specified source and validates the provided PMTiles archive is valid. +impl AsyncPmTilesReader { + /// Creates a new reader from a specified source and validates the provided `PMTiles` archive is valid. /// - /// Note: Prefer using new_with_* methods. + /// Note: Prefer using `new_with_*` methods. pub async fn try_from_source(backend: B) -> Result { + Self::try_from_cached_source(backend, NoCache).await + } +} + +impl AsyncPmTilesReader { + /// Creates a new cached reader from a specified source and validates the provided `PMTiles` archive is valid. + /// + /// Note: Prefer using `new_with_*` methods. + pub async fn try_from_cached_source(backend: B, cache: C) -> Result { // Read the first 127 and up to 16,384 bytes to ensure we can initialize the header and root directory. let mut initial_bytes = backend.read(0, MAX_INITIAL_BYTES).await?; if initial_bytes.len() < HEADER_SIZE { @@ -47,6 +64,7 @@ impl AsyncPmTilesReader { Ok(Self { backend, + cache, header, root_directory, }) @@ -130,7 +148,7 @@ impl AsyncPmTilesReader { } /// Recursively locates a tile in the archive. - async fn find_tile_entry(&self, tile_id: u64) -> Option { + async fn find_tile_entry(&self, tile_id: u64) -> Option { let entry = self.root_directory.find_tile_id(tile_id); if let Some(entry) = entry { if entry.is_leaf() { @@ -141,15 +159,25 @@ impl AsyncPmTilesReader { } #[async_recursion] - async fn find_entry_rec(&self, tile_id: u64, entry: &Entry, depth: u8) -> Option { + async fn find_entry_rec(&self, tile_id: u64, entry: &DirEntry, depth: u8) -> Option { // the recursion is done as two functions because it is a bit cleaner, // and it allows directory to be cached later without cloning it first. let offset = (self.header.leaf_offset + entry.offset) as _; - let length = entry.length as _; - let dir = self.read_directory(offset, length).await.ok()?; - let entry = dir.find_tile_id(tile_id); - if let Some(entry) = entry { + let entry = match self.cache.get_dir_entry(offset, tile_id).await { + DirCacheResult::NotCached => { + // Cache miss - read from backend + let length = entry.length as _; + let dir = self.read_directory(offset, length).await.ok()?; + let entry = dir.find_tile_id(tile_id).cloned(); + self.cache.insert_dir(offset, dir).await; + entry + } + DirCacheResult::NotFound => None, + DirCacheResult::Found(entry) => Some(entry), + }; + + if let Some(ref entry) = entry { if entry.is_leaf() { return if depth <= 4 { self.find_entry_rec(tile_id, entry, depth + 1).await @@ -159,7 +187,7 @@ impl AsyncPmTilesReader { } } - entry.cloned() + entry } async fn read_directory(&self, offset: usize, length: usize) -> Result { @@ -191,26 +219,50 @@ impl AsyncPmTilesReader { } #[cfg(feature = "http-async")] -impl AsyncPmTilesReader { - /// Creates a new PMTiles reader from a URL using the Reqwest backend. +impl AsyncPmTilesReader { + /// Creates a new `PMTiles` reader from a URL using the Reqwest backend. /// /// Fails if [url] does not exist or is an invalid archive. (Note: HTTP requests are made to validate it.) pub async fn new_with_url(client: Client, url: U) -> Result { + Self::new_with_cached_url(NoCache, client, url).await + } +} + +#[cfg(feature = "http-async")] +impl AsyncPmTilesReader { + /// Creates a new `PMTiles` reader with cache from a URL using the Reqwest backend. + /// + /// Fails if [url] does not exist or is an invalid archive. (Note: HTTP requests are made to validate it.) + pub async fn new_with_cached_url( + cache: C, + client: Client, + url: U, + ) -> Result { let backend = HttpBackend::try_from(client, url)?; - Self::try_from_source(backend).await + Self::try_from_cached_source(backend, cache).await } } #[cfg(feature = "mmap-async-tokio")] -impl AsyncPmTilesReader { - /// Creates a new PMTiles reader from a file path using the async mmap backend. +impl AsyncPmTilesReader { + /// Creates a new `PMTiles` reader from a file path using the async mmap backend. /// /// Fails if [p] does not exist or is an invalid archive. pub async fn new_with_path>(path: P) -> Result { + Self::new_with_cached_path(NoCache, path).await + } +} + +#[cfg(feature = "mmap-async-tokio")] +impl AsyncPmTilesReader { + /// Creates a new cached `PMTiles` reader from a file path using the async mmap backend. + /// + /// Fails if [p] does not exist or is an invalid archive. + pub async fn new_with_cached_path>(cache: C, path: P) -> Result { let backend = MmapBackend::try_from(path).await?; - Self::try_from_source(backend).await + Self::try_from_cached_source(backend, cache).await } } diff --git a/src/cache.rs b/src/cache.rs new file mode 100644 index 0000000..bd9a844 --- /dev/null +++ b/src/cache.rs @@ -0,0 +1,65 @@ +use std::collections::HashMap; +use std::sync::{Arc, RwLock}; + +use async_trait::async_trait; + +use crate::directory::{DirEntry, Directory}; + +pub enum DirCacheResult { + NotCached, + NotFound, + Found(DirEntry), +} + +impl From> for DirCacheResult { + fn from(entry: Option<&DirEntry>) -> Self { + match entry { + Some(entry) => DirCacheResult::Found(entry.clone()), + None => DirCacheResult::NotFound, + } + } +} + +/// A cache for PMTiles directories. +#[async_trait] +pub trait DirectoryCache { + /// Get a directory from the cache, using the offset as a key. + async fn get_dir_entry(&self, offset: usize, tile_id: u64) -> DirCacheResult; + + /// Insert a directory into the cache, using the offset as a key. + /// Note that cache must be internally mutable. + async fn insert_dir(&self, offset: usize, directory: Directory); +} + +pub struct NoCache; + +#[async_trait] +impl DirectoryCache for NoCache { + #[inline] + async fn get_dir_entry(&self, _offset: usize, _tile_id: u64) -> DirCacheResult { + DirCacheResult::NotCached + } + + #[inline] + async fn insert_dir(&self, _offset: usize, _directory: Directory) {} +} + +/// A simple HashMap-based implementation of a `PMTiles` directory cache. +#[derive(Default)] +pub struct HashMapCache { + pub cache: Arc>>, +} + +#[async_trait] +impl DirectoryCache for HashMapCache { + async fn get_dir_entry(&self, offset: usize, tile_id: u64) -> DirCacheResult { + if let Some(dir) = self.cache.read().unwrap().get(&offset) { + return dir.find_tile_id(tile_id).into(); + } + DirCacheResult::NotCached + } + + async fn insert_dir(&self, offset: usize, directory: Directory) { + self.cache.write().unwrap().insert(offset, directory); + } +} diff --git a/src/directory.rs b/src/directory.rs index ad8a36f..b985ec3 100644 --- a/src/directory.rs +++ b/src/directory.rs @@ -5,8 +5,9 @@ use varint_rs::VarintReader; use crate::error::PmtError; -pub(crate) struct Directory { - entries: Vec, +#[derive(Clone)] +pub struct Directory { + entries: Vec, } impl Debug for Directory { @@ -17,7 +18,8 @@ impl Debug for Directory { impl Directory { #[cfg(any(feature = "http-async", feature = "mmap-async-tokio"))] - pub fn find_tile_id(&self, tile_id: u64) -> Option<&Entry> { + #[must_use] + pub fn find_tile_id(&self, tile_id: u64) -> Option<&DirEntry> { match self.entries.binary_search_by(|e| e.tile_id.cmp(&tile_id)) { Ok(idx) => self.entries.get(idx), Err(next_id) => { @@ -26,7 +28,7 @@ impl Directory { if next_id > 0 { let previous_tile = self.entries.get(next_id - 1)?; if previous_tile.is_leaf() - || tile_id - previous_tile.tile_id < previous_tile.run_length as u64 + || tile_id - previous_tile.tile_id < u64::from(previous_tile.run_length) { return Some(previous_tile); } @@ -44,32 +46,32 @@ impl TryFrom for Directory { let mut buffer = buffer.reader(); let n_entries = buffer.read_usize_varint()?; - let mut entries = vec![Entry::default(); n_entries]; + let mut entries = vec![DirEntry::default(); n_entries]; // Read tile IDs let mut next_tile_id = 0; - for entry in entries.iter_mut() { + for entry in &mut entries { next_tile_id += buffer.read_u64_varint()?; entry.tile_id = next_tile_id; } // Read Run Lengths - for entry in entries.iter_mut() { + for entry in &mut entries { entry.run_length = buffer.read_u32_varint()?; } // Read Lengths - for entry in entries.iter_mut() { + for entry in &mut entries { entry.length = buffer.read_u32_varint()?; } // Read Offsets - let mut last_entry: Option<&Entry> = None; - for entry in entries.iter_mut() { + let mut last_entry: Option<&DirEntry> = None; + for entry in &mut entries { let offset = buffer.read_u64_varint()?; entry.offset = if offset == 0 { let e = last_entry.ok_or(PmtError::InvalidEntry)?; - e.offset + e.length as u64 + e.offset + u64::from(e.length) } else { offset - 1 }; @@ -81,7 +83,7 @@ impl TryFrom for Directory { } #[derive(Clone, Default, Debug)] -pub(crate) struct Entry { +pub struct DirEntry { pub(crate) tile_id: u64, pub(crate) offset: u64, pub(crate) length: u32, @@ -89,8 +91,8 @@ pub(crate) struct Entry { } #[cfg(any(feature = "http-async", feature = "mmap-async-tokio"))] -impl Entry { - pub fn is_leaf(&self) -> bool { +impl DirEntry { + pub(crate) fn is_leaf(&self) -> bool { self.run_length == 0 } } @@ -115,7 +117,7 @@ mod tests { reader.read_exact(header_bytes.as_mut()).unwrap(); let header = Header::try_from_bytes(header_bytes.freeze()).unwrap(); - let mut directory_bytes = BytesMut::zeroed(header.root_length as usize); + let mut directory_bytes = BytesMut::zeroed(usize::try_from(header.root_length).unwrap()); reader.read_exact(directory_bytes.as_mut()).unwrap(); let mut decompressed = BytesMut::zeroed(directory_bytes.len() * 2); @@ -135,7 +137,7 @@ mod tests { // ...it breaks pattern on the 59th tile assert_eq!(directory.entries[58].tile_id, 58); assert_eq!(directory.entries[58].run_length, 2); - assert_eq!(directory.entries[58].offset, 422070); + assert_eq!(directory.entries[58].offset, 422_070); assert_eq!(directory.entries[58].length, 850); } } diff --git a/src/header.rs b/src/header.rs index 2f704f5..9db9df8 100644 --- a/src/header.rs +++ b/src/header.rs @@ -49,6 +49,7 @@ pub enum Compression { } impl Compression { + #[must_use] pub fn content_encoding(&self) -> Option<&'static str> { Some(match self { Compression::Gzip => "gzip", @@ -75,6 +76,7 @@ impl TryInto for u8 { #[cfg(feature = "tilejson")] impl Header { + #[must_use] pub fn get_tilejson(&self, sources: Vec) -> tilejson::TileJSON { tilejson::tilejson! { tiles: sources, @@ -85,19 +87,21 @@ impl Header { } } + #[must_use] pub fn get_bounds(&self) -> tilejson::Bounds { tilejson::Bounds::new( - self.min_longitude as f64, - self.min_latitude as f64, - self.max_longitude as f64, - self.max_latitude as f64, + f64::from(self.min_longitude), + f64::from(self.min_latitude), + f64::from(self.max_longitude), + f64::from(self.max_latitude), ) } + #[must_use] pub fn get_center(&self) -> tilejson::Center { tilejson::Center::new( - self.center_longitude as f64, - self.center_latitude as f64, + f64::from(self.center_longitude), + f64::from(self.center_latitude), self.center_zoom, ) } @@ -113,6 +117,7 @@ pub enum TileType { } impl TileType { + #[must_use] pub fn content_type(&self) -> &'static str { match self { TileType::Mvt => "application/vnd.mapbox-vector-tile", @@ -143,7 +148,9 @@ static V3_MAGIC: &str = "PMTiles"; static V2_MAGIC: &str = "PM"; impl Header { + #[allow(clippy::cast_precision_loss)] fn read_coordinate_part(mut buf: B) -> f32 { + // TODO: would it be more precise to do `((value as f64) / 10_000_000.) as f32` ? buf.get_i32_le() as f32 / 10_000_000. } @@ -195,6 +202,7 @@ impl Header { #[cfg(test)] mod tests { + #![allow(clippy::unreadable_literal, clippy::float_cmp)] use std::fs::File; use std::io::Read; use std::num::NonZeroU64; diff --git a/src/lib.rs b/src/lib.rs index 43c8476..82909b9 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,16 +1,18 @@ #![forbid(unsafe_code)] +mod tile; + +mod header; pub use crate::header::{Compression, Header, TileType}; mod directory; +pub use directory::{DirEntry, Directory}; mod error; pub use error::PmtError; #[cfg(feature = "http-async")] pub use error::PmtHttpError; -mod header; - #[cfg(feature = "http-async")] pub mod http; @@ -19,7 +21,9 @@ pub mod mmap; #[cfg(any(feature = "http-async", feature = "mmap-async-tokio"))] pub mod async_reader; -pub mod tile; + +#[cfg(any(feature = "http-async", feature = "mmap-async-tokio"))] +pub mod cache; #[cfg(test)] mod tests { diff --git a/src/tile.rs b/src/tile.rs index 0d7005c..c55d2a6 100644 --- a/src/tile.rs +++ b/src/tile.rs @@ -4,7 +4,8 @@ pub(crate) fn tile_id(z: u8, x: u64, y: u64) -> u64 { return 0; } - let base_id: u64 = 1 + (1..z).map(|i| 4u64.pow(i as u32)).sum::(); + // TODO: minor optimization with bit shifting + let base_id: u64 = 1 + (1..z).map(|i| 4u64.pow(u32::from(i))).sum::(); let tile_id = hilbert_2d::u64::xy2h_discrete(x, y, z.into(), hilbert_2d::Variant::Hilbert);