Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions be/src/exec/operator/olap_scan_operator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -241,6 +241,15 @@ Status OlapScanLocalState::_init_profile() {
ADD_COUNTER_WITH_LEVEL(_segment_profile, "RowsInvertedIndexFiltered", TUnit::UNIT, 1);
_inverted_index_filter_timer =
ADD_TIMER_WITH_LEVEL(_segment_profile, "InvertedIndexFilterTime", 1);
// Sub-parts of InvertedIndexFilterTime (sum should ≈ parent). These diagnose
// where the filter-block wall time is actually spent when InvertedIndexFilterTime
// and InvertedIndexQueryTime do not match.
_inverted_index_apply_col_pred_timer = ADD_CHILD_TIMER_WITH_LEVEL(
_segment_profile, "InvertedIndexApplyColPredTime", "InvertedIndexFilterTime", 1);
_inverted_index_apply_expr_timer = ADD_CHILD_TIMER_WITH_LEVEL(
_segment_profile, "InvertedIndexApplyExprTime", "InvertedIndexFilterTime", 1);
_inverted_index_post_filter_timer = ADD_CHILD_TIMER_WITH_LEVEL(
_segment_profile, "InvertedIndexPostFilterTime", "InvertedIndexFilterTime", 1);
_inverted_index_query_cache_hit_counter =
ADD_COUNTER_WITH_LEVEL(_segment_profile, "InvertedIndexQueryCacheHit", TUnit::UNIT, 1);
_inverted_index_query_cache_miss_counter =
Expand Down
6 changes: 6 additions & 0 deletions be/src/exec/operator/olap_scan_operator.h
Original file line number Diff line number Diff line change
Expand Up @@ -217,6 +217,12 @@ class OlapScanLocalState final : public ScanLocalState<OlapScanLocalState> {
RuntimeProfile::Counter* _statistics_collect_timer = nullptr;
RuntimeProfile::Counter* _inverted_index_filter_counter = nullptr;
RuntimeProfile::Counter* _inverted_index_filter_timer = nullptr;
// Sub-parts of _inverted_index_filter_timer for diagnosing where the
// inverted-index filter-block wall time is spent (apply_col_pred +
// apply_expr + post_filter should sum to the parent FilterTime).
RuntimeProfile::Counter* _inverted_index_apply_col_pred_timer = nullptr;
RuntimeProfile::Counter* _inverted_index_apply_expr_timer = nullptr;
RuntimeProfile::Counter* _inverted_index_post_filter_timer = nullptr;
RuntimeProfile::Counter* _inverted_index_query_null_bitmap_timer = nullptr;
RuntimeProfile::Counter* _inverted_index_query_cache_hit_counter = nullptr;
RuntimeProfile::Counter* _inverted_index_query_cache_miss_counter = nullptr;
Expand Down
6 changes: 6 additions & 0 deletions be/src/exec/scan/olap_scanner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -825,6 +825,12 @@ void OlapScanner::_collect_profile_before_close() {
COUNTER_UPDATE(local_state->_cached_pages_num_counter, stats.cached_pages_num);
COUNTER_UPDATE(local_state->_inverted_index_filter_counter, stats.rows_inverted_index_filtered);
COUNTER_UPDATE(local_state->_inverted_index_filter_timer, stats.inverted_index_filter_timer);
COUNTER_UPDATE(local_state->_inverted_index_apply_col_pred_timer,
stats.inverted_index_apply_col_pred_timer);
COUNTER_UPDATE(local_state->_inverted_index_apply_expr_timer,
stats.inverted_index_apply_expr_timer);
COUNTER_UPDATE(local_state->_inverted_index_post_filter_timer,
stats.inverted_index_post_filter_timer);
COUNTER_UPDATE(local_state->_inverted_index_query_cache_hit_counter,
stats.inverted_index_query_cache_hit);
COUNTER_UPDATE(local_state->_inverted_index_query_cache_miss_counter,
Expand Down
141 changes: 92 additions & 49 deletions be/src/exprs/function/function_search.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -606,19 +606,24 @@ Status FunctionSearch::evaluate_inverted_index_with_search_param(
minimum_should_match = search_param.minimum_should_match;
}

auto* stats = context->stats;
int64_t dummy_timer = 0;
SCOPED_RAW_TIMER(stats ? &stats->inverted_index_searcher_search_timer : &dummy_timer);

// ---- Phase A: Build the query tree (OUTSIDE search_timer scope). --------
// `build_query_recursive` walks the DSL and, at every leaf, calls
// `FieldReaderResolver::resolve(...)` which in turn opens the CLucene
// directory / builds the IndexSearcher. That opening cost is already
// tracked by `inverted_index_searcher_open_timer` inside the resolver.
//
// Keep this OUTSIDE `inverted_index_searcher_search_timer` so that
// OpenTime becomes a SIBLING of SearchTime under QueryTime, mirroring the
// MATCH path where `handle_searcher_cache` (OpenTime) and
// `match_index_search` (SearchTime) are both direct children of QueryTime
// in `FullTextIndexReader::query`. Previously OpenTime was nested inside
// SearchInitTime → SearchTime, which broke the invariant
// QueryTime ≈ OpenTime + SearchTime.
query_v2::QueryPtr root_query;
std::string root_binding_key;
{
int64_t init_dummy = 0;
SCOPED_RAW_TIMER(stats ? &stats->inverted_index_searcher_search_init_timer : &init_dummy);
RETURN_IF_ERROR(build_query_recursive(search_param.root, context, resolver, &root_query,
&root_binding_key, default_operator,
minimum_should_match));
}
RETURN_IF_ERROR(build_query_recursive(search_param.root, context, resolver, &root_query,
&root_binding_key, default_operator,
minimum_should_match));
if (root_query == nullptr) {
LOG(INFO) << "search: Query tree resolved to empty query, dsl:"
<< search_param.original_dsl;
Expand All @@ -640,55 +645,93 @@ Status FunctionSearch::evaluate_inverted_index_with_search_param(
top_k = index_query_context->query_limit;
}

auto weight = root_query->weight(enable_scoring);
if (!weight) {
LOG(WARNING) << "search: Failed to build query weight";
bitmap_result = InvertedIndexResultBitmap(std::make_shared<roaring::Roaring>(),
std::make_shared<roaring::Roaring>());
return Status::OK();
}

// ---- Phase B: Execute query (INSIDE search_timer scope). ----------------
// SearchTime is the analog of MATCH path's `match_index_search`:
// SearchInitTime – prepare the executable query structure
// (`query->add(query_info)` in MATCH;
// `Query::weight(...)` here — creating the Weight
// from the query tree)
// SearchExecTime – iterate the scorer and extract the null bitmap. The
// actual `weight->scorer(...)` call that opens the
// per-term posting-list iterators lives inside
// `collect_multi_segment_*`, which is inside this
// scope.
// Together they should cover nearly all of SearchTime, restoring the
// invariant `SearchTime ≈ SearchInitTime + SearchExecTime`. Result-bitmap
// construction and DSL cache insertion happen OUTSIDE SearchTime (still
// inside QueryTime), mirroring the MATCH path where `match_index_search`
// returns first and only then the caller does `cache->insert(...)`.
std::shared_ptr<roaring::Roaring> roaring = std::make_shared<roaring::Roaring>();
std::shared_ptr<roaring::Roaring> null_bitmap = std::make_shared<roaring::Roaring>();
{
int64_t exec_dummy = 0;
SCOPED_RAW_TIMER(stats ? &stats->inverted_index_searcher_search_exec_timer : &exec_dummy);
if (enable_scoring && !is_asc && top_k > 0) {
bool use_wand = index_query_context->runtime_state != nullptr &&
index_query_context->runtime_state->query_options()
.enable_inverted_index_wand_query;
query_v2::collect_multi_segment_top_k(
weight, exec_ctx, root_binding_key, top_k, roaring,
index_query_context->collection_similarity, use_wand);
} else {
query_v2::collect_multi_segment_doc_set(
weight, exec_ctx, root_binding_key, roaring,
index_query_context ? index_query_context->collection_similarity : nullptr,
enable_scoring);
auto* stats = context->stats;
int64_t dummy_timer = 0;
SCOPED_RAW_TIMER(stats ? &stats->inverted_index_searcher_search_timer : &dummy_timer);

query_v2::WeightPtr weight;
{
int64_t init_dummy = 0;
SCOPED_RAW_TIMER(stats ? &stats->inverted_index_searcher_search_init_timer
: &init_dummy);
weight = root_query->weight(enable_scoring);
if (!weight) {
LOG(WARNING) << "search: Failed to build query weight";
bitmap_result = InvertedIndexResultBitmap(std::make_shared<roaring::Roaring>(),
std::make_shared<roaring::Roaring>());
return Status::OK();
}
}
}

VLOG_DEBUG << "search: Query completed, matched " << roaring->cardinality() << " documents";
{
int64_t exec_dummy = 0;
SCOPED_RAW_TIMER(stats ? &stats->inverted_index_searcher_search_exec_timer
: &exec_dummy);
if (enable_scoring && !is_asc && top_k > 0) {
bool use_wand = index_query_context->runtime_state != nullptr &&
index_query_context->runtime_state->query_options()
.enable_inverted_index_wand_query;
query_v2::collect_multi_segment_top_k(
weight, exec_ctx, root_binding_key, top_k, roaring,
index_query_context->collection_similarity, use_wand);
} else {
query_v2::collect_multi_segment_doc_set(
weight, exec_ctx, root_binding_key, roaring,
index_query_context ? index_query_context->collection_similarity : nullptr,
enable_scoring);
}

// Extract NULL bitmap from three-valued logic scorer
// The scorer correctly computes which documents evaluate to NULL based on query logic
// For example: TRUE OR NULL = TRUE (not NULL), FALSE OR NULL = NULL
std::shared_ptr<roaring::Roaring> null_bitmap = std::make_shared<roaring::Roaring>();
if (exec_ctx.null_resolver) {
auto scorer = weight->scorer(exec_ctx, root_binding_key);
if (scorer && scorer->has_null_bitmap(exec_ctx.null_resolver)) {
const auto* bitmap = scorer->get_null_bitmap(exec_ctx.null_resolver);
if (bitmap != nullptr) {
*null_bitmap = *bitmap;
VLOG_TRACE << "search: Extracted NULL bitmap with " << null_bitmap->cardinality()
<< " NULL documents";
VLOG_DEBUG << "search: Query completed, matched " << roaring->cardinality()
<< " documents";

// Extract NULL bitmap from three-valued logic scorer. Keep inside
// SearchExecTime so the invariant
// SearchTime ≈ SearchInitTime + SearchExecTime
// holds. `inverted_index_query_null_bitmap_timer` still reports its
// own slice as a separate sibling of QueryTime; the small overlap
// between those two timers is intentional and mirrors the MATCH
// path (where null-bitmap read happens inside handle_searcher_cache
// which is already covered by OpenTime).
if (exec_ctx.null_resolver) {
auto scorer = weight->scorer(exec_ctx, root_binding_key);
if (scorer && scorer->has_null_bitmap(exec_ctx.null_resolver)) {
const auto* bitmap = scorer->get_null_bitmap(exec_ctx.null_resolver);
if (bitmap != nullptr) {
*null_bitmap = *bitmap;
VLOG_TRACE << "search: Extracted NULL bitmap with "
<< null_bitmap->cardinality() << " NULL documents";
}
}
}
}
}
} // SearchTime scope ends here.

VLOG_TRACE << "search: Before mask - true_bitmap=" << roaring->cardinality()
<< ", null_bitmap=" << null_bitmap->cardinality();

// Create result and mask out NULLs (SQL WHERE clause semantics: only TRUE rows)
// Create result and mask out NULLs (SQL WHERE clause semantics: only TRUE
// rows). Done AFTER SearchTime so SearchTime measures only the work that
// MATCH-path `match_index_search` performs; result-object construction
// happens in the caller slot, still inside QueryTime.
bitmap_result = InvertedIndexResultBitmap(std::move(roaring), std::move(null_bitmap));
bitmap_result.mask_out_null();

Expand Down
24 changes: 21 additions & 3 deletions be/src/exprs/vsearch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -233,13 +233,19 @@ Status VSearchExpr::execute_column(VExprContext* context, const Block* block, Se
}

Status VSearchExpr::evaluate_inverted_index(VExprContext* context, uint32_t segment_num_rows) {
// NOTE: this function runs once per segment. Keep the hot path quiet —
// LOG(INFO) / LOG(WARNING) at this level can add tens of seconds of
// cumulative wall-clock inside InvertedIndexFilterTime on busy scans with
// many segments, making the filter-time dwarf the inner
// InvertedIndexQueryTime. Prefer VLOG_DEBUG except for real failures
// returned up the stack.
if (_search_param.original_dsl.empty()) {
return Status::InvalidArgument("search DSL is empty");
}

auto index_context = context->get_index_context();
if (!index_context) {
LOG(WARNING) << "VSearchExpr: No inverted index context available";
VLOG_DEBUG << "VSearchExpr: No inverted index context available";
return Status::OK();
}

Expand All @@ -250,8 +256,8 @@ Status VSearchExpr::evaluate_inverted_index(VExprContext* context, uint32_t segm

const bool is_nested_query = _search_param.root.clause_type == "NESTED";
if (bundle.iterators.empty() && !is_nested_query) {
LOG(WARNING) << "VSearchExpr: No indexed columns available for evaluation, DSL: "
<< _original_dsl;
VLOG_DEBUG << "VSearchExpr: No indexed columns available for evaluation, DSL: "
<< _original_dsl;
auto empty_bitmap = InvertedIndexResultBitmap(std::make_shared<roaring::Roaring>(),
std::make_shared<roaring::Roaring>());
index_context->set_index_result_for_expr(this, std::move(empty_bitmap));
Expand All @@ -272,6 +278,18 @@ Status VSearchExpr::evaluate_inverted_index(VExprContext* context, uint32_t segm
return status;
}

// IMPORTANT: do NOT std::move `result_bitmap` here. The DSL result cache
// (InvertedIndexQueryCache) already holds a shared_ptr to the same
// underlying roaring::Roaring as `result_bitmap._data_bitmap`. Storing a
// moved-from copy in the per-segment expr context would make the per-
// segment map entry SHARE that same Roaring object, and any later
// mutating consumer (notably `VCompoundPred::COMPOUND_NOT` →
// `InvertedIndexResultBitmap::op_not()`, which is `const` but writes
// through the shared_ptr) would corrupt the DSL cache entry, so the
// *next* segment that gets a cache hit would observe the negated data
// instead of the original SEARCH result. Pass by lvalue so the copy
// constructor deep-copies the bitmap and isolates the cache from
// per-segment mutations.
index_context->set_index_result_for_expr(this, result_bitmap);
for (int column_id : bundle.column_ids) {
index_context->set_true_for_index_status(this, column_id);
Expand Down
7 changes: 7 additions & 0 deletions be/src/storage/olap_common.h
Original file line number Diff line number Diff line change
Expand Up @@ -361,6 +361,13 @@ struct OlapReaderStatistics {

int64_t rows_inverted_index_filtered = 0;
int64_t inverted_index_filter_timer = 0;
// Sub-parts of inverted_index_filter_timer, used to diagnose the gap between
// inverted_index_filter_timer and the inner InvertedIndexQueryTime.
// inverted_index_filter_timer ≈ apply_col_pred + apply_expr + post_filter_bitmap_op
// apply_expr ≈ inverted_index_query_timer + small VSearchExpr / VExpr overhead
int64_t inverted_index_apply_col_pred_timer = 0;
int64_t inverted_index_apply_expr_timer = 0;
int64_t inverted_index_post_filter_timer = 0;
int64_t inverted_index_query_timer = 0;
int64_t inverted_index_query_cache_hit = 0;
int64_t inverted_index_query_cache_miss = 0;
Expand Down
Loading
Loading