Skip to content

Commit ce9d3f0

Browse files
zhongzcevenyag
authored andcommitted
fix(bloom-filter): skip applying for non-indexed columns (GreptimeTeam#5246)
Signed-off-by: Zhenchi <[email protected]>
1 parent a3e212d commit ce9d3f0

File tree

4 files changed

+67
-31
lines changed

4 files changed

+67
-31
lines changed

src/mito2/src/cache/index/bloom_filter_index.rs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,7 @@ fn bloom_filter_index_content_weight((k, _): &((FileId, ColumnId), PageKey), v:
6161
pub struct CachedBloomFilterIndexBlobReader<R> {
6262
file_id: FileId,
6363
column_id: ColumnId,
64-
file_size: u64,
64+
blob_size: u64,
6565
inner: R,
6666
cache: BloomFilterIndexCacheRef,
6767
}
@@ -71,14 +71,14 @@ impl<R> CachedBloomFilterIndexBlobReader<R> {
7171
pub fn new(
7272
file_id: FileId,
7373
column_id: ColumnId,
74-
file_size: u64,
74+
blob_size: u64,
7575
inner: R,
7676
cache: BloomFilterIndexCacheRef,
7777
) -> Self {
7878
Self {
7979
file_id,
8080
column_id,
81-
file_size,
81+
blob_size,
8282
inner,
8383
cache,
8484
}
@@ -92,7 +92,7 @@ impl<R: BloomFilterReader + Send> BloomFilterReader for CachedBloomFilterIndexBl
9292
self.cache
9393
.get_or_load(
9494
(self.file_id, self.column_id),
95-
self.file_size,
95+
self.blob_size,
9696
offset,
9797
size,
9898
move |ranges| async move { inner.read_vec(&ranges).await },

src/mito2/src/cache/index/inverted_index.rs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -58,17 +58,17 @@ fn inverted_index_content_weight((k, _): &(FileId, PageKey), v: &Bytes) -> u32 {
5858
/// Inverted index blob reader with cache.
5959
pub struct CachedInvertedIndexBlobReader<R> {
6060
file_id: FileId,
61-
file_size: u64,
61+
blob_size: u64,
6262
inner: R,
6363
cache: InvertedIndexCacheRef,
6464
}
6565

6666
impl<R> CachedInvertedIndexBlobReader<R> {
6767
/// Creates a new inverted index blob reader with cache.
68-
pub fn new(file_id: FileId, file_size: u64, inner: R, cache: InvertedIndexCacheRef) -> Self {
68+
pub fn new(file_id: FileId, blob_size: u64, inner: R, cache: InvertedIndexCacheRef) -> Self {
6969
Self {
7070
file_id,
71-
file_size,
71+
blob_size,
7272
inner,
7373
cache,
7474
}
@@ -82,7 +82,7 @@ impl<R: InvertedIndexReader> InvertedIndexReader for CachedInvertedIndexBlobRead
8282
self.cache
8383
.get_or_load(
8484
self.file_id,
85-
self.file_size,
85+
self.blob_size,
8686
offset,
8787
size,
8888
move |ranges| async move { inner.read_vec(&ranges).await },

src/mito2/src/sst/index/bloom_filter/applier.rs

Lines changed: 57 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -33,18 +33,18 @@ use snafu::{OptionExt, ResultExt};
3333
use store_api::metadata::RegionMetadata;
3434
use store_api::storage::{ColumnId, RegionId};
3535

36-
use super::INDEX_BLOB_TYPE;
3736
use crate::cache::file_cache::{FileCacheRef, FileType, IndexKey};
3837
use crate::cache::index::bloom_filter_index::{
3938
BloomFilterIndexCacheRef, CachedBloomFilterIndexBlobReader,
4039
};
4140
use crate::error::{
42-
ApplyBloomFilterIndexSnafu, ColumnNotFoundSnafu, ConvertValueSnafu, MetadataSnafu,
41+
ApplyBloomFilterIndexSnafu, ColumnNotFoundSnafu, ConvertValueSnafu, Error, MetadataSnafu,
4342
PuffinBuildReaderSnafu, PuffinReadBlobSnafu, Result,
4443
};
4544
use crate::metrics::INDEX_APPLY_ELAPSED;
4645
use crate::row_converter::SortField;
4746
use crate::sst::file::FileId;
47+
use crate::sst::index::bloom_filter::INDEX_BLOB_TYPE;
4848
use crate::sst::index::codec::IndexValueCodec;
4949
use crate::sst::index::puffin_manager::{BlobReader, PuffinManagerFactory};
5050
use crate::sst::index::TYPE_BLOOM_FILTER_INDEX;
@@ -118,28 +118,21 @@ impl BloomFilterIndexApplier {
118118
.start_timer();
119119

120120
for (column_id, predicates) in &self.filters {
121-
let mut blob = match self.cached_blob_reader(file_id, *column_id).await {
122-
Ok(Some(puffin_reader)) => puffin_reader,
123-
other => {
124-
if let Err(err) = other {
125-
warn!(err; "An unexpected error occurred while reading the cached index file. Fallback to remote index file.")
126-
}
127-
self.remote_blob_reader(file_id, *column_id, file_size_hint)
128-
.await?
129-
}
121+
let mut blob = match self
122+
.blob_reader(file_id, *column_id, file_size_hint)
123+
.await?
124+
{
125+
Some(blob) => blob,
126+
None => continue,
130127
};
131128

132129
// Create appropriate reader based on whether we have caching enabled
133130
if let Some(bloom_filter_cache) = &self.bloom_filter_index_cache {
134-
let file_size = if let Some(file_size) = file_size_hint {
135-
file_size
136-
} else {
137-
blob.metadata().await.context(MetadataSnafu)?.content_length
138-
};
131+
let blob_size = blob.metadata().await.context(MetadataSnafu)?.content_length;
139132
let reader = CachedBloomFilterIndexBlobReader::new(
140133
file_id,
141134
*column_id,
142-
file_size,
135+
blob_size,
143136
BloomFilterReaderImpl::new(blob),
144137
bloom_filter_cache.clone(),
145138
);
@@ -157,6 +150,43 @@ impl BloomFilterIndexApplier {
157150
Ok(())
158151
}
159152

153+
/// Creates a blob reader from the cached or remote index file.
154+
///
155+
/// Returus `None` if the column does not have an index.
156+
async fn blob_reader(
157+
&self,
158+
file_id: FileId,
159+
column_id: ColumnId,
160+
file_size_hint: Option<u64>,
161+
) -> Result<Option<BlobReader>> {
162+
let reader = match self.cached_blob_reader(file_id, column_id).await {
163+
Ok(Some(puffin_reader)) => puffin_reader,
164+
other => {
165+
if let Err(err) = other {
166+
// Blob not found means no index for this column
167+
if is_blob_not_found(&err) {
168+
return Ok(None);
169+
}
170+
warn!(err; "An unexpected error occurred while reading the cached index file. Fallback to remote index file.")
171+
}
172+
let res = self
173+
.remote_blob_reader(file_id, column_id, file_size_hint)
174+
.await;
175+
if let Err(err) = res {
176+
// Blob not found means no index for this column
177+
if is_blob_not_found(&err) {
178+
return Ok(None);
179+
}
180+
return Err(err);
181+
}
182+
183+
res?
184+
}
185+
};
186+
187+
Ok(Some(reader))
188+
}
189+
160190
/// Creates a blob reader from the cached index file
161191
async fn cached_blob_reader(
162192
&self,
@@ -242,6 +272,16 @@ impl BloomFilterIndexApplier {
242272
}
243273
}
244274

275+
fn is_blob_not_found(err: &Error) -> bool {
276+
matches!(
277+
err,
278+
Error::PuffinBuildReader {
279+
source: puffin::error::Error::BlobNotFound { .. },
280+
..
281+
}
282+
)
283+
}
284+
245285
pub struct BloomFilterIndexApplierBuilder<'a> {
246286
region_dir: String,
247287
object_store: ObjectStore,

src/mito2/src/sst/index/inverted_index/applier.rs

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -137,14 +137,10 @@ impl InvertedIndexApplier {
137137
};
138138

139139
if let Some(index_cache) = &self.inverted_index_cache {
140-
let file_size = if let Some(file_size) = file_size_hint {
141-
file_size
142-
} else {
143-
blob.metadata().await.context(MetadataSnafu)?.content_length
144-
};
140+
let blob_size = blob.metadata().await.context(MetadataSnafu)?.content_length;
145141
let mut index_reader = CachedInvertedIndexBlobReader::new(
146142
file_id,
147-
file_size,
143+
blob_size,
148144
InvertedIndexBlobReader::new(blob),
149145
index_cache.clone(),
150146
);

0 commit comments

Comments
 (0)