|
16 | 16 | // under the License. |
17 | 17 |
|
18 | 18 | use criterion::{criterion_group, criterion_main, Criterion}; |
19 | | -use sqlparser::dialect::GenericDialect; |
| 19 | +use sqlparser::dialect::{GenericDialect, PostgreSqlDialect, SQLiteDialect}; |
20 | 20 | use sqlparser::keywords::Keyword; |
21 | 21 | use sqlparser::parser::Parser; |
22 | 22 | use sqlparser::tokenizer::{Span, Word}; |
@@ -177,11 +177,82 @@ fn parse_compound_chain(c: &mut Criterion) { |
177 | 177 | group.finish(); |
178 | 178 | } |
179 | 179 |
|
| 180 | +/// Benchmark parsing pathological compound chains with a reserved keyword in |
| 181 | +/// field position, like `SELECT x.not-b.not-b...`. The `.not-b` shape used to |
| 182 | +/// cause 2^N work in `parse_compound_expr` because `parse_prefix` descended |
| 183 | +/// into `parse_not` -> `parse_subexpr`, re-walking the remaining chain at |
| 184 | +/// every segment. |
| 185 | +fn parse_compound_keyword_chain(c: &mut Criterion) { |
| 186 | + let mut group = c.benchmark_group("parse_compound_keyword_chain"); |
| 187 | + let dialect = GenericDialect {}; |
| 188 | + |
| 189 | + for &n in &[5usize, 10, 15] { |
| 190 | + let body = std::iter::repeat_n(".not-b", n).collect::<String>(); |
| 191 | + let sql = format!("SELECT x{body}"); |
| 192 | + |
| 193 | + group.bench_function(format!("chain_{n}"), |b| { |
| 194 | + b.iter(|| { |
| 195 | + let _ = Parser::parse_sql(&dialect, std::hint::black_box(&sql)); |
| 196 | + }); |
| 197 | + }); |
| 198 | + } |
| 199 | + |
| 200 | + group.finish(); |
| 201 | +} |
| 202 | + |
| 203 | +/// Benchmark parsing pathological `IF(<keyword-fn>(<keyword-fn>(...x` chains |
| 204 | +/// that previously caused 2^N work in `parse_prefix`. Each nested |
| 205 | +/// `current_time(` segment used to be explored twice at every level (once via |
| 206 | +/// the speculative reserved-word arm, once via the unreserved-word fallback), |
| 207 | +/// doubling work per level. Post-fix the cost is linear in chain length. |
| 208 | +fn parse_prefix_keyword_call_chain(c: &mut Criterion) { |
| 209 | + let mut group = c.benchmark_group("parse_prefix_keyword_call_chain"); |
| 210 | + let dialect = PostgreSqlDialect {}; |
| 211 | + |
| 212 | + for &n in &[10usize, 20, 30] { |
| 213 | + let sql = String::from("if(") + &"current_time(".repeat(n) + "x"; |
| 214 | + |
| 215 | + group.bench_function(format!("chain_{n}"), |b| { |
| 216 | + b.iter(|| { |
| 217 | + let _ = Parser::parse_sql(&dialect, std::hint::black_box(&sql)); |
| 218 | + }); |
| 219 | + }); |
| 220 | + } |
| 221 | + |
| 222 | + group.finish(); |
| 223 | +} |
| 224 | + |
| 225 | +/// Benchmark parsing pathological `case-case-case-...c` chains that |
| 226 | +/// previously caused 2^N work in `parse_prefix`. Each `case` token used to |
| 227 | +/// trigger a speculative `parse_case_expr` that recursively descends the |
| 228 | +/// chain, but the unreserved-word fallback returns `Identifier(case)` so the |
| 229 | +/// overall `parse_prefix` succeeds and the failure cache never fires. |
| 230 | +/// Post-fix the per-arm cache short-circuits the speculative descent. |
| 231 | +fn parse_prefix_case_chain(c: &mut Criterion) { |
| 232 | + let mut group = c.benchmark_group("parse_prefix_case_chain"); |
| 233 | + let dialect = SQLiteDialect {}; |
| 234 | + |
| 235 | + for &n in &[10usize, 20, 30] { |
| 236 | + let sql = "case\t-".repeat(n) + "c"; |
| 237 | + |
| 238 | + group.bench_function(format!("chain_{n}"), |b| { |
| 239 | + b.iter(|| { |
| 240 | + let _ = Parser::parse_sql(&dialect, std::hint::black_box(&sql)); |
| 241 | + }); |
| 242 | + }); |
| 243 | + } |
| 244 | + |
| 245 | + group.finish(); |
| 246 | +} |
| 247 | + |
180 | 248 | criterion_group!( |
181 | 249 | benches, |
182 | 250 | basic_queries, |
183 | 251 | word_to_ident, |
184 | 252 | parse_many_identifiers, |
185 | | - parse_compound_chain |
| 253 | + parse_compound_chain, |
| 254 | + parse_compound_keyword_chain, |
| 255 | + parse_prefix_keyword_call_chain, |
| 256 | + parse_prefix_case_chain |
186 | 257 | ); |
187 | 258 | criterion_main!(benches); |
0 commit comments