From 3637a82f48664cc01fc4e621f4a8b7393b92b3e9 Mon Sep 17 00:00:00 2001 From: Kould Date: Fri, 17 Jan 2025 19:52:10 +0800 Subject: [PATCH] refactor: simplify index cover implementation --- core/schema.rs | 39 ++++++-- core/translate/delete.rs | 2 - core/translate/emitter.rs | 192 +++++++++++++++--------------------- core/translate/expr.rs | 6 +- core/translate/optimizer.rs | 73 +++++++++++--- core/translate/plan.rs | 7 +- core/translate/select.rs | 2 - core/util.rs | 6 +- core/vdbe/builder.rs | 6 +- testing/where.test | 2 +- 10 files changed, 183 insertions(+), 152 deletions(-) diff --git a/core/schema.rs b/core/schema.rs index b69e18213..7e6f86b0e 100644 --- a/core/schema.rs +++ b/core/schema.rs @@ -458,6 +458,7 @@ pub struct Index { pub root_page: usize, pub columns: Vec, pub unique: bool, + pub index_cover_mapping: HashMap, } #[allow(dead_code)] @@ -474,7 +475,11 @@ pub enum Order { } impl Index { - pub fn from_sql(sql: &str, root_page: usize) -> Result { + pub fn from_sql( + sql: &str, + tables: &HashMap>, + root_page: usize, + ) -> Result { let mut parser = Parser::new(sql.as_bytes()); let cmd = parser.next()?; match cmd { @@ -485,24 +490,44 @@ impl Index { unique, .. })) => { + let table_name = normalize_ident(&tbl_name.0); + let Some((_, table)) = tables.iter().find(|(t_name, _)| t_name == &&table_name) + else { + crate::bail_corrupt_error!("Parse error: no such table: {}", tbl_name); + }; + let mut index_cover_mapping = HashMap::with_capacity(columns.len()); let index_name = normalize_ident(&idx_name.name.0); - let index_columns = columns - .into_iter() - .map(|col| IndexColumn { - name: normalize_ident(&col.expr.to_string()), + + let mut index_columns = Vec::with_capacity(columns.len()); + + for (i, col) in columns.into_iter().enumerate() { + let column_name = normalize_ident(&col.expr.to_string()); + // SAFETY: the column in the index must exist in the table + let (pos, _) = table.get_column(&column_name).unwrap(); + index_cover_mapping.insert(pos, i); + index_columns.push(IndexColumn { + name: column_name, order: match col.order { Some(sqlite3_parser::ast::SortOrder::Asc) => Order::Ascending, Some(sqlite3_parser::ast::SortOrder::Desc) => Order::Descending, None => Order::Ascending, }, }) - .collect(); + } + if let Some(pos) = table + .columns + .iter() + .position(|column| column.is_rowid_alias) + { + index_cover_mapping.insert(pos, index_cover_mapping.len()); + } Ok(Index { name: index_name, - table_name: normalize_ident(&tbl_name.0), + table_name, root_page, columns: index_columns, unique, + index_cover_mapping, }) } _ => todo!("Expected create index statement"), diff --git a/core/translate/delete.rs b/core/translate/delete.rs index 2c72efce0..be7dd772a 100644 --- a/core/translate/delete.rs +++ b/core/translate/delete.rs @@ -6,7 +6,6 @@ use crate::translate::planner::{parse_limit, parse_where}; use crate::{schema::Schema, storage::sqlite3_ondisk::DatabaseHeader, vdbe::Program}; use crate::{Connection, Result, SymbolTable}; use sqlite3_parser::ast::{Expr, Limit, QualifiedName}; -use std::collections::BTreeSet; use std::rc::Weak; use std::{cell::RefCell, rc::Rc}; @@ -59,7 +58,6 @@ pub fn prepare_delete_plan( iter_dir: None, }, result_columns: vec![], - related_columns: BTreeSet::new(), where_clause: resolved_where_clauses, order_by: None, limit: resolved_limit, diff --git a/core/translate/emitter.rs b/core/translate/emitter.rs index cb2a4cb0e..5641dfa5b 100644 --- a/core/translate/emitter.rs +++ b/core/translate/emitter.rs @@ -2,18 +2,18 @@ // It handles translating high-level SQL operations into low-level bytecode that can be executed by the virtual machine. use std::cell::RefCell; -use std::collections::{BTreeSet, HashMap}; +use std::collections::HashMap; use std::rc::{Rc, Weak}; use sqlite3_parser::ast::{self}; use crate::schema::{Column, PseudoTable, Table}; use crate::storage::sqlite3_ondisk::DatabaseHeader; -use crate::translate::plan::{ColumnBinding, DeletePlan, IterationDirection, Plan, Search}; +use crate::translate::plan::{DeletePlan, IterationDirection, Plan, Search}; use crate::types::{OwnedRecord, OwnedValue}; use crate::util::exprs_are_equivalent; use crate::vdbe::builder::ProgramBuilder; -use crate::vdbe::{insn::Insn, BranchOffset, CursorID, Program}; +use crate::vdbe::{insn::Insn, BranchOffset, Program}; use crate::{Connection, Result, SymbolTable}; use super::expr::{ @@ -378,7 +378,6 @@ fn emit_query( open_loop( program, &mut plan.source, - &plan.related_columns, &plan.referenced_tables, metadata, syms, @@ -471,7 +470,6 @@ fn emit_program_for_delete( open_loop( &mut program, &mut plan.source, - &plan.related_columns, &plan.referenced_tables, &mut metadata, syms, @@ -666,34 +664,26 @@ fn init_source( search, .. } => { - let table_cursor_id = program.alloc_cursor_id( - Some(table_reference.table_identifier.clone()), - Some(table_reference.table.clone()), - ); - - match mode { - OperationMode::SELECT => { - program.emit_insn(Insn::OpenReadAsync { - cursor_id: table_cursor_id, - root_page: table_reference.table.get_root_page(), - }); - program.emit_insn(Insn::OpenReadAwait {}); - } - OperationMode::DELETE => { - program.emit_insn(Insn::OpenWriteAsync { - cursor_id: table_cursor_id, - root_page: table_reference.table.get_root_page(), - }); - program.emit_insn(Insn::OpenWriteAwait {}); - } - _ => { - unimplemented!() - } - } + let mut is_cover = false; - if let Search::IndexSearch { index, .. } = search { - let index_cursor_id = program - .alloc_cursor_id(Some(index.name.clone()), Some(Table::Index(index.clone()))); + if let Search::IndexSearch { + index, + cover_mapping, + .. + } = search + { + let index_cursor_id = if cover_mapping.is_some() { + is_cover = true; + program.alloc_cursor_id( + Some(table_reference.table_identifier.clone()), + Some(Table::Index(index.clone())), + ) + } else { + program.alloc_cursor_id( + Some(index.name.clone()), + Some(Table::Index(index.clone())), + ) + }; match mode { OperationMode::SELECT => { @@ -716,6 +706,33 @@ fn init_source( } } + if !is_cover { + let table_cursor_id = program.alloc_cursor_id( + Some(table_reference.table_identifier.clone()), + Some(table_reference.table.clone()), + ); + + match mode { + OperationMode::SELECT => { + program.emit_insn(Insn::OpenReadAsync { + cursor_id: table_cursor_id, + root_page: table_reference.table.get_root_page(), + }); + program.emit_insn(Insn::OpenReadAwait {}); + } + OperationMode::DELETE => { + program.emit_insn(Insn::OpenWriteAsync { + cursor_id: table_cursor_id, + root_page: table_reference.table.get_root_page(), + }); + program.emit_insn(Insn::OpenWriteAwait {}); + } + _ => { + unimplemented!() + } + } + } + Ok(()) } SourceOperator::Nothing { .. } => Ok(()), @@ -728,7 +745,6 @@ fn init_source( fn open_loop( program: &mut ProgramBuilder, source: &mut SourceOperator, - related_columns: &BTreeSet, referenced_tables: &[TableReference], metadata: &mut Metadata, syms: &SymbolTable, @@ -803,14 +819,7 @@ fn open_loop( outer, .. } => { - open_loop( - program, - left, - related_columns, - referenced_tables, - metadata, - syms, - )?; + open_loop(program, left, referenced_tables, metadata, syms)?; let loop_labels = metadata .loop_labels @@ -828,14 +837,7 @@ fn open_loop( jump_target_when_false = lj_meta.check_match_flag_label; } - open_loop( - program, - right, - related_columns, - referenced_tables, - metadata, - syms, - )?; + open_loop(program, right, referenced_tables, metadata, syms)?; if let Some(predicates) = predicates { let jump_target_when_true = program.allocate_label(); @@ -938,6 +940,7 @@ fn open_loop( predicates, .. } => { + let mut is_index_cover = false; let table_cursor_id = program.resolve_cursor_id(&table_reference.table_identifier); let loop_labels = metadata .loop_labels @@ -946,8 +949,21 @@ fn open_loop( // Open the loop for the index search. // Rowid equality point lookups are handled with a SeekRowid instruction which does not loop, since it is a single row lookup. if !matches!(search, Search::RowidEq { .. }) { - let index_cursor_id = if let Search::IndexSearch { index, .. } = search { - Some(program.resolve_cursor_id(&index.name)) + let index_cursor_id = if let Search::IndexSearch { + index, + cover_mapping, + .. + } = search + { + if let Some(cover_mapping) = &cover_mapping { + program + .table_remapping_cursors + .insert(table_cursor_id, cover_mapping.clone()); + is_index_cover = true; + Some(table_cursor_id) + } else { + Some(program.resolve_cursor_id(&index.name)) + } } else { None }; @@ -1086,19 +1102,11 @@ fn open_loop( _ => {} } - if let Some(index_cursor_id) = index_cursor_id { - if !try_index_cover( - program, - related_columns, - table_reference, + if let (Some(index_cursor_id), false) = (index_cursor_id, is_index_cover) { + program.emit_insn(Insn::DeferredSeek { index_cursor_id, table_cursor_id, - ) { - program.emit_insn(Insn::DeferredSeek { - index_cursor_id, - table_cursor_id, - }); - } + }); } } @@ -1147,54 +1155,6 @@ fn open_loop( } } -fn try_index_cover( - program: &mut ProgramBuilder, - related_columns: &BTreeSet, - table_reference: &TableReference, - index_cursor_id: CursorID, - table_cursor_id: CursorID, -) -> bool { - let TableReference { - table, table_index, .. - } = table_reference; - - if let (_, Some(Table::Index(index))) = &program.cursor_ref[index_cursor_id] { - let mut index_column_map = HashMap::with_capacity(index.columns.len()); - let index_clone = Rc::clone(index); - for (i, index_column) in index_clone.columns.iter().enumerate() { - // SAFETY: the column in the index must exist in the table - let (pos, _) = table.get_column(&index_column.name).unwrap(); - index_column_map.insert(pos, i); - } - if let Some(pos) = table - .columns() - .iter() - .position(|column| column.is_rowid_alias) - { - index_column_map.insert(pos, index_column_map.len()); - } - - let lower = ColumnBinding { - table: *table_index, - column: 0, - }; - let upper = ColumnBinding { - table: table_index + 1, - column: 0, - }; - let is_cover = related_columns - .range(lower..upper) - .all(|binding| index_column_map.contains_key(&binding.column)); - if is_cover { - program - .index_cover_cursors - .insert(table_cursor_id, (index_cursor_id, index_column_map)); - return true; - } - } - false -} - /// SQLite (and so Limbo) processes joins as a nested loop. /// The inner loop may emit rows to various destinations depending on the query: /// - a GROUP BY sorter (grouping is done by sorting based on the GROUP BY keys and aggregating while the GROUP BY keys match) @@ -1532,7 +1492,17 @@ fn close_loop( return Ok(()); } let cursor_id = match search { - Search::IndexSearch { index, .. } => program.resolve_cursor_id(&index.name), + Search::IndexSearch { + index, + cover_mapping, + .. + } => { + if cover_mapping.is_some() { + program.resolve_cursor_id(&table_reference.table_identifier) + } else { + program.resolve_cursor_id(&index.name) + } + } Search::RowidSearch { .. } => { program.resolve_cursor_id(&table_reference.table_identifier) } diff --git a/core/translate/expr.rs b/core/translate/expr.rs index ffec914b3..93d5ca3c9 100644 --- a/core/translate/expr.rs +++ b/core/translate/expr.rs @@ -1967,12 +1967,10 @@ pub fn translate_expr( // the table and read the column from the cursor. TableReferenceType::BTreeTable => { let cursor_id = program.resolve_cursor_id(&tbl_ref.table_identifier); - if let Some((index_cursor_id, columns_mapping)) = - program.index_cover_cursors.get(&cursor_id) - { + if let Some(columns_mapping) = program.table_remapping_cursors.get(&cursor_id) { // FIXME: row_id is displayed as `column` in explain program.emit_insn(Insn::Column { - cursor_id: *index_cursor_id, + cursor_id, column: columns_mapping[column], dest: target_register, }); diff --git a/core/translate/optimizer.rs b/core/translate/optimizer.rs index 70b92b2fc..60c2c60d7 100644 --- a/core/translate/optimizer.rs +++ b/core/translate/optimizer.rs @@ -42,10 +42,23 @@ fn optimize_select_plan(plan: &mut SelectPlan) -> Result<()> { &plan.referenced_tables, )?; + let mut related_columns = BTreeSet::new(); + collect_related_columns( + &plan.source, + &plan.result_columns, + &plan.where_clause, + &plan.group_by, + &plan.order_by, + &plan.aggregates, + &mut related_columns, + &plan.referenced_tables, + )?; + use_indexes( &mut plan.source, &plan.referenced_tables, &plan.available_indexes, + &related_columns, )?; eliminate_unnecessary_orderby( @@ -55,17 +68,6 @@ fn optimize_select_plan(plan: &mut SelectPlan) -> Result<()> { &plan.available_indexes, )?; - related_columns( - &plan.source, - &plan.result_columns, - &plan.where_clause, - &plan.group_by, - &plan.order_by, - &plan.aggregates, - &mut plan.related_columns, - &plan.referenced_tables, - )?; - Ok(()) } @@ -78,10 +80,23 @@ fn optimize_delete_plan(plan: &mut DeletePlan) -> Result<()> { return Ok(()); } + let mut related_columns = BTreeSet::new(); + collect_related_columns( + &plan.source, + &plan.result_columns, + &plan.where_clause, + &None, + &plan.order_by, + &[], + &mut related_columns, + &plan.referenced_tables, + )?; + use_indexes( &mut plan.source, &plan.referenced_tables, &plan.available_indexes, + &related_columns, )?; Ok(()) @@ -175,6 +190,7 @@ fn use_indexes( operator: &mut SourceOperator, referenced_tables: &[TableReference], available_indexes: &[Rc], + related_columns: &BTreeSet, ) -> Result<()> { match operator { SourceOperator::Subquery { .. } => Ok(()), @@ -205,8 +221,31 @@ fn use_indexes( Either::Left(non_index_using_expr) => { fs[i] = non_index_using_expr; } - Either::Right(index_search) => { + Either::Right(mut index_search) => { fs.remove(i); + + if let Search::IndexSearch { + index, + cover_mapping, + .. + } = &mut index_search + { + let lower = ColumnBinding { + table: table_index, + column: 0, + }; + let upper = ColumnBinding { + table: table_index + 1, + column: 0, + }; + let is_cover = related_columns.range(lower..upper).all(|binding| { + index.index_cover_mapping.contains_key(&binding.column) + }); + if is_cover { + *cover_mapping = Some(index.index_cover_mapping.clone()); + } + } + *operator = SourceOperator::Search { id: *id, table_reference: table_reference.clone(), @@ -222,8 +261,8 @@ fn use_indexes( Ok(()) } SourceOperator::Join { left, right, .. } => { - use_indexes(left, referenced_tables, available_indexes)?; - use_indexes(right, referenced_tables, available_indexes)?; + use_indexes(left, referenced_tables, available_indexes, related_columns)?; + use_indexes(right, referenced_tables, available_indexes, related_columns)?; Ok(()) } SourceOperator::Nothing { .. } => Ok(()), @@ -658,7 +697,7 @@ fn eliminate_between( Ok(()) } -fn related_columns( +fn collect_related_columns( operator: &SourceOperator, result_columns: &[ResultSetColumn], where_clause: &Option>, @@ -751,7 +790,7 @@ fn source_related_columns( SourceOperator::Subquery { plan, predicates, .. } => { - optimizer::related_columns( + optimizer::collect_related_columns( &plan.source, &plan.result_columns, &plan.where_clause, @@ -1392,6 +1431,7 @@ pub fn try_extract_index_search_expression( index: available_indexes[index_index].clone(), cmp_op: operator, cmp_expr: *rhs, + cover_mapping: None, })); } _ => {} @@ -1411,6 +1451,7 @@ pub fn try_extract_index_search_expression( index: available_indexes[index_index].clone(), cmp_op: operator, cmp_expr: *lhs, + cover_mapping: None, })); } _ => {} diff --git a/core/translate/plan.rs b/core/translate/plan.rs index dcc976e90..bf90464dd 100644 --- a/core/translate/plan.rs +++ b/core/translate/plan.rs @@ -16,7 +16,7 @@ use crate::{ translate::plan::Plan::{Delete, Select}, }; use std::cmp::Ordering; -use std::collections::BTreeSet; +use std::collections::HashMap; #[derive(Debug, Clone)] pub struct ResultSetColumn { @@ -77,8 +77,6 @@ pub struct SelectPlan { pub source: SourceOperator, /// the columns inside SELECT ... FROM pub result_columns: Vec, - /// the columns related to this plan, - pub related_columns: BTreeSet, /// where clause split into a vec at 'AND' boundaries. pub where_clause: Option>, /// group by clause @@ -106,8 +104,6 @@ pub struct DeletePlan { pub source: SourceOperator, /// the columns inside SELECT ... FROM pub result_columns: Vec, - /// the columns related to this plan, - pub related_columns: BTreeSet, /// where clause split into a vec at 'AND' boundaries. pub where_clause: Option>, /// order by clause @@ -324,6 +320,7 @@ pub enum Search { index: Rc, cmp_op: ast::Operator, cmp_expr: ast::Expr, + cover_mapping: Option>, }, } diff --git a/core/translate/select.rs b/core/translate/select.rs index 66f2b3413..7deca5946 100644 --- a/core/translate/select.rs +++ b/core/translate/select.rs @@ -14,7 +14,6 @@ use crate::{schema::Schema, vdbe::Program, Result}; use crate::{Connection, SymbolTable}; use sqlite3_parser::ast; use sqlite3_parser::ast::ResultColumn; -use std::collections::BTreeSet; use std::rc::Weak; use std::{cell::RefCell, rc::Rc}; @@ -52,7 +51,6 @@ pub fn prepare_select_plan(schema: &Schema, select: ast::Select) -> Result let mut plan = SelectPlan { source, result_columns: vec![], - related_columns: BTreeSet::new(), where_clause: None, group_by: None, order_by: None, diff --git a/core/util.rs b/core/util.rs index 4dee111df..7a7f1a548 100644 --- a/core/util.rs +++ b/core/util.rs @@ -43,7 +43,11 @@ pub fn parse_schema_rows(rows: Option, schema: &mut Schema, io: Arc(3)?; match row.get::<&str>(4) { Ok(sql) => { - let index = schema::Index::from_sql(sql, root_page as usize)?; + let index = schema::Index::from_sql( + sql, + &schema.tables, + root_page as usize, + )?; schema.add_index(Rc::new(index)); } _ => continue, diff --git a/core/vdbe/builder.rs b/core/vdbe/builder.rs index 4868e483b..9b74eb3c3 100644 --- a/core/vdbe/builder.rs +++ b/core/vdbe/builder.rs @@ -29,8 +29,8 @@ pub struct ProgramBuilder { // map of instruction index to manual comment (used in EXPLAIN) comments: HashMap, // index cursor that can cover table cursor - // Table Cursor -> (Index Cursor, Map) - pub index_cover_cursors: HashMap)>, + // Table Cursor -> Map + pub table_remapping_cursors: HashMap>, } impl ProgramBuilder { @@ -47,7 +47,7 @@ impl ProgramBuilder { deferred_label_resolutions: Vec::new(), seekrowid_emitted_bitmask: 0, comments: HashMap::new(), - index_cover_cursors: HashMap::new(), + table_remapping_cursors: Default::default(), } } diff --git a/testing/where.test b/testing/where.test index 1722d8d35..66d720134 100755 --- a/testing/where.test +++ b/testing/where.test @@ -320,7 +320,7 @@ do_execsql_test where-age-index-seek-regression-test-2 { do_execsql_test where-age-index-cover-seek { select id, age from users where age > 90 limit 1; -} {120|90} +} {54|91} do_execsql_test where-simple-between { SELECT * FROM products WHERE price BETWEEN 70 AND 100;