Skip to content

Commit 9947519

Browse files
Merge branch 'main' into array
2 parents d959cd9 + 20b9849 commit 9947519

19 files changed

Lines changed: 599 additions & 75 deletions

sqlparser_bench/benches/sqlparser_bench.rs

Lines changed: 73 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
// under the License.
1717

1818
use criterion::{criterion_group, criterion_main, Criterion};
19-
use sqlparser::dialect::GenericDialect;
19+
use sqlparser::dialect::{GenericDialect, PostgreSqlDialect, SQLiteDialect};
2020
use sqlparser::keywords::Keyword;
2121
use sqlparser::parser::Parser;
2222
use sqlparser::tokenizer::{Span, Word};
@@ -177,11 +177,82 @@ fn parse_compound_chain(c: &mut Criterion) {
177177
group.finish();
178178
}
179179

180+
/// Benchmark parsing pathological compound chains with a reserved keyword in
181+
/// field position, like `SELECT x.not-b.not-b...`. The `.not-b` shape used to
182+
/// cause 2^N work in `parse_compound_expr` because `parse_prefix` descended
183+
/// into `parse_not` -> `parse_subexpr`, re-walking the remaining chain at
184+
/// every segment.
185+
fn parse_compound_keyword_chain(c: &mut Criterion) {
186+
let mut group = c.benchmark_group("parse_compound_keyword_chain");
187+
let dialect = GenericDialect {};
188+
189+
for &n in &[5usize, 10, 15] {
190+
let body = std::iter::repeat_n(".not-b", n).collect::<String>();
191+
let sql = format!("SELECT x{body}");
192+
193+
group.bench_function(format!("chain_{n}"), |b| {
194+
b.iter(|| {
195+
let _ = Parser::parse_sql(&dialect, std::hint::black_box(&sql));
196+
});
197+
});
198+
}
199+
200+
group.finish();
201+
}
202+
203+
/// Benchmark parsing pathological `IF(<keyword-fn>(<keyword-fn>(...x` chains
204+
/// that previously caused 2^N work in `parse_prefix`. Each nested
205+
/// `current_time(` segment used to be explored twice at every level (once via
206+
/// the speculative reserved-word arm, once via the unreserved-word fallback),
207+
/// doubling work per level. Post-fix the cost is linear in chain length.
208+
fn parse_prefix_keyword_call_chain(c: &mut Criterion) {
209+
let mut group = c.benchmark_group("parse_prefix_keyword_call_chain");
210+
let dialect = PostgreSqlDialect {};
211+
212+
for &n in &[10usize, 20, 30] {
213+
let sql = String::from("if(") + &"current_time(".repeat(n) + "x";
214+
215+
group.bench_function(format!("chain_{n}"), |b| {
216+
b.iter(|| {
217+
let _ = Parser::parse_sql(&dialect, std::hint::black_box(&sql));
218+
});
219+
});
220+
}
221+
222+
group.finish();
223+
}
224+
225+
/// Benchmark parsing pathological `case-case-case-...c` chains that
226+
/// previously caused 2^N work in `parse_prefix`. Each `case` token used to
227+
/// trigger a speculative `parse_case_expr` that recursively descends the
228+
/// chain, but the unreserved-word fallback returns `Identifier(case)` so the
229+
/// overall `parse_prefix` succeeds and the failure cache never fires.
230+
/// Post-fix the per-arm cache short-circuits the speculative descent.
231+
fn parse_prefix_case_chain(c: &mut Criterion) {
232+
let mut group = c.benchmark_group("parse_prefix_case_chain");
233+
let dialect = SQLiteDialect {};
234+
235+
for &n in &[10usize, 20, 30] {
236+
let sql = "case\t-".repeat(n) + "c";
237+
238+
group.bench_function(format!("chain_{n}"), |b| {
239+
b.iter(|| {
240+
let _ = Parser::parse_sql(&dialect, std::hint::black_box(&sql));
241+
});
242+
});
243+
}
244+
245+
group.finish();
246+
}
247+
180248
criterion_group!(
181249
benches,
182250
basic_queries,
183251
word_to_ident,
184252
parse_many_identifiers,
185-
parse_compound_chain
253+
parse_compound_chain,
254+
parse_compound_keyword_chain,
255+
parse_prefix_keyword_call_chain,
256+
parse_prefix_case_chain
186257
);
187258
criterion_main!(benches);

src/ast/comments.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ impl Comments {
3333
/// last accepted comment. In other words, this method will skip the
3434
/// comment if its comming out of order (as encountered in the parsed
3535
/// source code.)
36-
pub(crate) fn offer(&mut self, comment: CommentWithSpan) {
36+
pub fn offer(&mut self, comment: CommentWithSpan) {
3737
if self
3838
.0
3939
.last()
@@ -71,7 +71,7 @@ impl Comments {
7171
/// // all comments appearing before line seven, i.e. before the first statement itself
7272
/// assert_eq!(
7373
/// &comments.find(..Location::new(7, 1)).map(|c| c.as_str()).collect::<Vec<_>>(),
74-
/// &["\n header comment ...\n ... spanning multiple lines\n", " first statement\n"]);
74+
/// &["\n header comment ...\n ... spanning multiple lines\n", " first statement"]);
7575
///
7676
/// // all comments appearing within the first statement
7777
/// assert_eq!(
@@ -81,7 +81,7 @@ impl Comments {
8181
/// // all comments appearing within or after the first statement
8282
/// assert_eq!(
8383
/// &comments.find(Location::new(7, 1)..).map(|c| c.as_str()).collect::<Vec<_>>(),
84-
/// &[" world ", " second statement\n", " trailing comment\n"]);
84+
/// &[" world ", " second statement", " trailing comment"]);
8585
/// ```
8686
///
8787
/// The [Spanned](crate::ast::Spanned) trait allows you to access location

src/ast/data_type.rs

Lines changed: 22 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -439,10 +439,11 @@ pub enum DataType {
439439
Custom(ObjectName, Vec<String>),
440440
/// Arrays.
441441
Array(ArrayElemTypeDef),
442-
/// Map, see [ClickHouse].
442+
/// Map, see [ClickHouse], [Hive].
443443
///
444444
/// [ClickHouse]: https://clickhouse.com/docs/en/sql-reference/data-types/map
445-
Map(Box<DataType>, Box<DataType>),
445+
/// [Hive]: https://hive.apache.org/docs/latest/language/languagemanual-types/
446+
Map(Box<DataType>, Box<DataType>, MapBracketKind),
446447
/// Tuple, see [ClickHouse].
447448
///
448449
/// [ClickHouse]: https://clickhouse.com/docs/en/sql-reference/data-types/tuple
@@ -787,9 +788,14 @@ impl fmt::Display for DataType {
787788
DataType::LowCardinality(data_type) => {
788789
write!(f, "LowCardinality({data_type})")
789790
}
790-
DataType::Map(key_data_type, value_data_type) => {
791-
write!(f, "Map({key_data_type}, {value_data_type})")
792-
}
791+
DataType::Map(key_data_type, value_data_type, bracket) => match bracket {
792+
MapBracketKind::Parentheses => {
793+
write!(f, "Map({key_data_type}, {value_data_type})")
794+
}
795+
MapBracketKind::AngleBrackets => {
796+
write!(f, "MAP<{key_data_type}, {value_data_type}>")
797+
}
798+
},
793799
DataType::Tuple(fields) => {
794800
write!(f, "Tuple({})", display_comma_separated(fields))
795801
}
@@ -906,6 +912,17 @@ pub enum StructBracketKind {
906912
AngleBrackets,
907913
}
908914

915+
/// Type of brackets used for `MAP` types.
916+
#[derive(Debug, Clone, Copy, PartialEq, PartialOrd, Eq, Ord, Hash)]
917+
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
918+
#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
919+
pub enum MapBracketKind {
920+
/// Example: `Map(String, UInt16)`
921+
Parentheses,
922+
/// Example: `MAP<STRING, INT>`
923+
AngleBrackets,
924+
}
925+
909926
/// Timestamp and Time data types information about TimeZone formatting.
910927
///
911928
/// This is more related to a display information than real differences between each variant. To

src/ast/mod.rs

Lines changed: 50 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ use crate::{
5353

5454
pub use self::data_type::{
5555
ArrayElemTypeDef, BinaryLength, CharLengthUnits, CharacterLength, DataType, EnumMember,
56-
ExactNumberInfo, IntervalFields, StructBracketKind, TimezoneInfo,
56+
ExactNumberInfo, IntervalFields, MapBracketKind, StructBracketKind, TimezoneInfo,
5757
};
5858
pub use self::dcl::{
5959
AlterRoleOperation, CreateRole, Grant, ResetConfig, Revoke, RoleOption, SecondaryRoles,
@@ -4479,6 +4479,28 @@ pub enum Statement {
44794479
comment: Option<String>,
44804480
},
44814481
/// ```sql
4482+
/// CREATE [ OR REPLACE ] [ { TEMP | TEMPORARY | VOLATILE } ] FILE FORMAT [ IF NOT EXISTS ] <name>
4483+
/// [ TYPE = { CSV | JSON | AVRO | ORC | PARQUET | XML } [ formatTypeOptions ] ]
4484+
/// [ COMMENT = '<string_literal>' ]
4485+
/// ```
4486+
/// See <https://docs.snowflake.com/en/sql-reference/sql/create-file-format>
4487+
CreateFileFormat {
4488+
/// `OR REPLACE` flag.
4489+
or_replace: bool,
4490+
/// Whether file format is temporary.
4491+
temporary: bool,
4492+
/// Whether file format is volatile.
4493+
volatile: bool,
4494+
/// `IF NOT EXISTS` flag.
4495+
if_not_exists: bool,
4496+
/// File format name.
4497+
name: ObjectName,
4498+
/// Format type options (e.g. `TYPE`, `FIELD_DELIMITER`, `COMPRESSION`, ...).
4499+
options: KeyValueOptions,
4500+
/// Optional comment.
4501+
comment: Option<String>,
4502+
},
4503+
/// ```sql
44824504
/// ASSERT <condition> [AS <message>]
44834505
/// ```
44844506
Assert {
@@ -6177,6 +6199,31 @@ impl fmt::Display for Statement {
61776199
}
61786200
Ok(())
61796201
}
6202+
Statement::CreateFileFormat {
6203+
or_replace,
6204+
temporary,
6205+
volatile,
6206+
if_not_exists,
6207+
name,
6208+
options,
6209+
comment,
6210+
} => {
6211+
write!(
6212+
f,
6213+
"CREATE {or_replace}{temp}{volatile}FILE FORMAT {if_not_exists}{name}",
6214+
or_replace = if *or_replace { "OR REPLACE " } else { "" },
6215+
temp = if *temporary { "TEMPORARY " } else { "" },
6216+
volatile = if *volatile { "VOLATILE " } else { "" },
6217+
if_not_exists = if *if_not_exists { "IF NOT EXISTS " } else { "" },
6218+
)?;
6219+
if !options.options.is_empty() {
6220+
write!(f, " {options}")?;
6221+
}
6222+
if let Some(comment) = comment {
6223+
write!(f, " COMMENT='{}'", comment)?;
6224+
}
6225+
Ok(())
6226+
}
61806227
Statement::CopyIntoSnowflake {
61816228
kind,
61826229
into,
@@ -12030,7 +12077,8 @@ impl fmt::Display for OptimizerHint {
1203012077
f.write_str(prefix)?;
1203112078
f.write_str(&self.prefix)?;
1203212079
f.write_str("+")?;
12033-
f.write_str(&self.text)
12080+
f.write_str(&self.text)?;
12081+
f.write_str("\n")
1203412082
}
1203512083
OptimizerHintStyle::MultiLine => {
1203612084
f.write_str("/*")?;

src/ast/query.rs

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3715,8 +3715,11 @@ pub struct SelectInto {
37153715
pub unlogged: bool,
37163716
/// `TABLE` keyword present.
37173717
pub table: bool,
3718-
/// Name of the target table.
3719-
pub name: ObjectName,
3718+
/// Target(s) of the `INTO` clause.
3719+
///
3720+
/// [Postgres]: https://www.postgresql.org/docs/current/sql-selectinto.html
3721+
/// [MySQL]: https://dev.mysql.com/doc/refman/9.7/en/select-into.html
3722+
pub targets: Vec<Expr>,
37203723
}
37213724

37223725
impl fmt::Display for SelectInto {
@@ -3725,7 +3728,14 @@ impl fmt::Display for SelectInto {
37253728
let unlogged = if self.unlogged { " UNLOGGED" } else { "" };
37263729
let table = if self.table { " TABLE" } else { "" };
37273730

3728-
write!(f, "INTO{}{}{} {}", temporary, unlogged, table, self.name)
3731+
write!(
3732+
f,
3733+
"INTO{}{}{} {}",
3734+
temporary,
3735+
unlogged,
3736+
table,
3737+
display_comma_separated(&self.targets)
3738+
)
37293739
}
37303740
}
37313741

src/ast/spans.rs

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -297,6 +297,7 @@ impl Spanned for Values {
297297
/// - [Statement::CreateProcedure]
298298
/// - [Statement::CreateMacro]
299299
/// - [Statement::CreateStage]
300+
/// - [Statement::CreateFileFormat]
300301
/// - [Statement::Assert]
301302
/// - [Statement::Grant]
302303
/// - [Statement::Revoke]
@@ -457,6 +458,7 @@ impl Spanned for Statement {
457458
Statement::CreateProcedure { .. } => Span::empty(),
458459
Statement::CreateMacro { .. } => Span::empty(),
459460
Statement::CreateStage { .. } => Span::empty(),
461+
Statement::CreateFileFormat { .. } => Span::empty(),
460462
Statement::Assert { .. } => Span::empty(),
461463
Statement::Grant { .. } => Span::empty(),
462464
Statement::Deny { .. } => Span::empty(),
@@ -2389,10 +2391,10 @@ impl Spanned for SelectInto {
23892391
temporary: _, // bool
23902392
unlogged: _, // bool
23912393
table: _, // bool
2392-
name,
2394+
targets,
23932395
} = self;
23942396

2395-
name.span()
2397+
union_spans(targets.iter().map(|t| t.span()))
23962398
}
23972399
}
23982400

src/ast/value.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -273,7 +273,7 @@ impl fmt::Display for Value {
273273
Value::DollarQuotedString(v) => write!(f, "{v}"),
274274
Value::EscapedStringLiteral(v) => write!(f, "E'{}'", escape_escaped_string(v)),
275275
Value::UnicodeStringLiteral(v) => write!(f, "U&'{}'", escape_unicode_string(v)),
276-
Value::NationalStringLiteral(v) => write!(f, "N'{v}'"),
276+
Value::NationalStringLiteral(v) => write!(f, "N'{}'", escape_single_quote_string(v)),
277277
Value::QuoteDelimitedStringLiteral(v) => v.fmt(f),
278278
Value::NationalQuoteDelimitedStringLiteral(v) => write!(f, "N{v}"),
279279
Value::HexStringLiteral(v) => write!(f, "X'{v}'"),

src/dialect/hive.rs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -79,4 +79,9 @@ impl Dialect for HiveDialect {
7979
fn supports_from_first_insert(&self) -> bool {
8080
true
8181
}
82+
83+
/// See <https://hive.apache.org/docs/latest/language/languagemanual-types/>
84+
fn supports_map_literal_with_angle_brackets(&self) -> bool {
85+
true
86+
}
8287
}

src/dialect/snowflake.rs

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -326,6 +326,10 @@ impl Dialect for SnowflakeDialect {
326326
);
327327
} else if parser.parse_keyword(Keyword::DATABASE) {
328328
return Some(parse_create_database(or_replace, transient, parser));
329+
} else if parser.parse_keywords(&[Keyword::FILE, Keyword::FORMAT]) {
330+
return Some(parse_create_file_format(
331+
or_replace, temporary, volatile, parser,
332+
));
329333
} else {
330334
// need to go back with the cursor
331335
let mut back = 1;
@@ -1272,6 +1276,35 @@ pub fn parse_create_stage(
12721276
})
12731277
}
12741278

1279+
/// Parse a Snowflake `CREATE FILE FORMAT` statement.
1280+
/// See <https://docs.snowflake.com/en/sql-reference/sql/create-file-format>
1281+
pub fn parse_create_file_format(
1282+
or_replace: bool,
1283+
temporary: bool,
1284+
volatile: bool,
1285+
parser: &mut Parser,
1286+
) -> Result<Statement, ParserError> {
1287+
let if_not_exists = parser.parse_keywords(&[Keyword::IF, Keyword::NOT, Keyword::EXISTS]);
1288+
let name = parser.parse_object_name(true)?;
1289+
let options = parser.parse_key_value_options(false, &[Keyword::COMMENT])?;
1290+
let comment = if parser.parse_keyword(Keyword::COMMENT) {
1291+
parser.expect_token(&Token::Eq)?;
1292+
Some(parser.parse_comment_value()?)
1293+
} else {
1294+
None
1295+
};
1296+
1297+
Ok(Statement::CreateFileFormat {
1298+
or_replace,
1299+
temporary,
1300+
volatile,
1301+
if_not_exists,
1302+
name,
1303+
options,
1304+
comment,
1305+
})
1306+
}
1307+
12751308
pub fn parse_stage_name_identifier(parser: &mut Parser) -> Result<Ident, ParserError> {
12761309
let mut ident = String::new();
12771310
while let Some(next_token) = parser.next_token_no_skip() {

src/dialect/spark.rs

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -116,6 +116,13 @@ impl Dialect for SparkSqlDialect {
116116
true
117117
}
118118

119+
/// See:
120+
/// - <https://spark.apache.org/docs/latest/sql-pipe-syntax.html>
121+
/// - <https://issues.apache.org/jira/browse/SPARK-49528>
122+
fn supports_pipe_operator(&self) -> bool {
123+
true
124+
}
125+
119126
/// Parse the `DIV` keyword as integer division.
120127
///
121128
/// Example: `SELECT 10 DIV 3` returns `3`.

0 commit comments

Comments
 (0)