Skip to content

Commit f6f27ac

Browse files
committed
Fix doc placement, import path, extract helper, defer selectivity computation
- Fix expression_analyzer_registry doc comment misplaced between function_factory's doc comment and field declaration - Fix module doc example import path (physical_plan -> physical_expr) - Extract expression_analyzer_registry() helper in planner to avoid repeating the config check 4 times - Defer left_sel/right_sel computation to AND/OR arms only, avoiding unnecessary sub-expression selectivity estimation for comparison operators
1 parent 011f441 commit f6f27ac

File tree

4 files changed

+54
-44
lines changed

4 files changed

+54
-44
lines changed

datafusion/core/src/execution/session_state.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -186,12 +186,12 @@ pub struct SessionState {
186186
table_factories: HashMap<String, Arc<dyn TableProviderFactory>>,
187187
/// Runtime environment
188188
runtime_env: Arc<RuntimeEnv>,
189+
/// Registry for expression-level statistics analyzers (NDV, selectivity, etc.)
190+
expression_analyzer_registry: Arc<ExpressionAnalyzerRegistry>,
189191
/// [FunctionFactory] to support pluggable user defined function handler.
190192
///
191193
/// It will be invoked on `CREATE FUNCTION` statements.
192194
/// thus, changing dialect o PostgreSql is required
193-
/// Registry for expression-level statistics analyzers (NDV, selectivity, etc.)
194-
expression_analyzer_registry: Arc<ExpressionAnalyzerRegistry>,
195195
function_factory: Option<Arc<dyn FunctionFactory>>,
196196
cache_factory: Option<Arc<dyn CacheFactory>>,
197197
/// Cache logical plans of prepared statements for later execution.

datafusion/core/src/physical_planner.rs

Lines changed: 24 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,7 @@ use crate::physical_plan::{
5656
WindowExpr, displayable, windows,
5757
};
5858
use crate::schema_equivalence::schema_satisfied_by;
59+
use datafusion_physical_expr::expression_analyzer::ExpressionAnalyzerRegistry;
5960

6061
use arrow::array::{RecordBatch, builder::StringBuilder};
6162
use arrow::compute::SortOptions;
@@ -1110,17 +1111,11 @@ impl DefaultPhysicalPlanner {
11101111
physical_input,
11111112
)
11121113
.with_batch_size(session_state.config().batch_size());
1113-
let builder = if session_state
1114-
.config_options()
1115-
.optimizer
1116-
.enable_expression_analyzer
1117-
{
1118-
builder.with_expression_analyzer_registry(Arc::clone(
1119-
session_state.expression_analyzer_registry(),
1120-
))
1121-
} else {
1122-
builder
1123-
};
1114+
let builder =
1115+
match Self::expression_analyzer_registry(session_state) {
1116+
Some(r) => builder.with_expression_analyzer_registry(r),
1117+
None => builder,
1118+
};
11241119
builder.build()?
11251120
}
11261121
PlanAsyncExpr::Async(
@@ -1141,17 +1136,11 @@ impl DefaultPhysicalPlanner {
11411136
(0..input.schema().fields().len()).collect::<Vec<_>>(),
11421137
))?
11431138
.with_batch_size(session_state.config().batch_size());
1144-
let builder = if session_state
1145-
.config_options()
1146-
.optimizer
1147-
.enable_expression_analyzer
1148-
{
1149-
builder.with_expression_analyzer_registry(Arc::clone(
1150-
session_state.expression_analyzer_registry(),
1151-
))
1152-
} else {
1153-
builder
1154-
};
1139+
let builder =
1140+
match Self::expression_analyzer_registry(session_state) {
1141+
Some(r) => builder.with_expression_analyzer_registry(r),
1142+
None => builder,
1143+
};
11551144
builder.build()?
11561145
}
11571146
_ => {
@@ -2861,6 +2850,17 @@ impl DefaultPhysicalPlanner {
28612850
Ok(mem_exec)
28622851
}
28632852

2853+
/// Returns the expression analyzer registry if the config option is enabled.
2854+
fn expression_analyzer_registry(
2855+
session_state: &SessionState,
2856+
) -> Option<Arc<ExpressionAnalyzerRegistry>> {
2857+
session_state
2858+
.config_options()
2859+
.optimizer
2860+
.enable_expression_analyzer
2861+
.then(|| Arc::clone(session_state.expression_analyzer_registry()))
2862+
}
2863+
28642864
fn create_project_physical_exec(
28652865
&self,
28662866
session_state: &SessionState,
@@ -2921,14 +2921,8 @@ impl DefaultPhysicalPlanner {
29212921
.map(|(expr, alias)| ProjectionExpr { expr, alias })
29222922
.collect();
29232923
let mut proj_exec = ProjectionExec::try_new(proj_exprs, input_exec)?;
2924-
if session_state
2925-
.config_options()
2926-
.optimizer
2927-
.enable_expression_analyzer
2928-
{
2929-
proj_exec = proj_exec.with_expression_analyzer_registry(Arc::clone(
2930-
session_state.expression_analyzer_registry(),
2931-
));
2924+
if let Some(r) = Self::expression_analyzer_registry(session_state) {
2925+
proj_exec = proj_exec.with_expression_analyzer_registry(r);
29322926
}
29332927
Ok(Arc::new(proj_exec))
29342928
}

datafusion/physical-expr/src/expression_analyzer/default.rs

Lines changed: 27 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -72,18 +72,34 @@ impl ExpressionAnalyzer for DefaultExpressionAnalyzer {
7272
) -> AnalysisResult<f64> {
7373
// Binary expressions: AND, OR, comparisons
7474
if let Some(binary) = expr.as_any().downcast_ref::<BinaryExpr>() {
75-
let left_sel =
76-
self.estimate_selectivity_recursive(binary.left(), input_stats, registry);
77-
let right_sel = self.estimate_selectivity_recursive(
78-
binary.right(),
79-
input_stats,
80-
registry,
81-
);
82-
8375
let sel = match binary.op() {
84-
// Logical operators
85-
Operator::And => left_sel * right_sel,
86-
Operator::Or => left_sel + right_sel - (left_sel * right_sel),
76+
// Logical operators: need child selectivities
77+
Operator::And => {
78+
let left_sel = self.estimate_selectivity_recursive(
79+
binary.left(),
80+
input_stats,
81+
registry,
82+
);
83+
let right_sel = self.estimate_selectivity_recursive(
84+
binary.right(),
85+
input_stats,
86+
registry,
87+
);
88+
left_sel * right_sel
89+
}
90+
Operator::Or => {
91+
let left_sel = self.estimate_selectivity_recursive(
92+
binary.left(),
93+
input_stats,
94+
registry,
95+
);
96+
let right_sel = self.estimate_selectivity_recursive(
97+
binary.right(),
98+
input_stats,
99+
registry,
100+
);
101+
left_sel + right_sel - (left_sel * right_sel)
102+
}
87103

88104
// Equality: selectivity = 1/NDV
89105
Operator::Eq => {

datafusion/physical-expr/src/expression_analyzer/mod.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@
4040
//! # Example
4141
//!
4242
//! ```ignore
43-
//! use datafusion_physical_plan::expression_analyzer::*;
43+
//! use datafusion_physical_expr::expression_analyzer::*;
4444
//!
4545
//! // Create registry with default analyzer
4646
//! let mut registry = ExpressionAnalyzerRegistry::new();

0 commit comments

Comments
 (0)