Skip to content

Commit

Permalink
Add supports for Hive's SELECT ... GROUP BY .. GROUPING SETS syntax (
Browse files Browse the repository at this point in the history
…#1653)

Co-authored-by: Ifeanyi Ubah <[email protected]>
  • Loading branch information
wugeer and iffyio authored Feb 14, 2025
1 parent a5bbb5e commit 1c0e5d3
Show file tree
Hide file tree
Showing 8 changed files with 132 additions and 60 deletions.
10 changes: 9 additions & 1 deletion src/ast/query.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2547,13 +2547,18 @@ impl fmt::Display for SelectInto {
/// e.g. GROUP BY year WITH ROLLUP WITH TOTALS
///
/// [ClickHouse]: <https://clickhouse.com/docs/en/sql-reference/statements/select/group-by#rollup-modifier>
#[derive(Debug, Copy, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
pub enum GroupByWithModifier {
Rollup,
Cube,
Totals,
/// Hive supports GROUP BY GROUPING SETS syntax.
/// e.g. GROUP BY year , month GROUPING SETS((year,month),(year),(month))
///
/// [Hive]: <https://cwiki.apache.org/confluence/pages/viewpage.action?pageId=30151323#EnhancedAggregation,Cube,GroupingandRollup-GROUPINGSETSclause>
GroupingSets(Expr),
}

impl fmt::Display for GroupByWithModifier {
Expand All @@ -2562,6 +2567,9 @@ impl fmt::Display for GroupByWithModifier {
GroupByWithModifier::Rollup => write!(f, "WITH ROLLUP"),
GroupByWithModifier::Cube => write!(f, "WITH CUBE"),
GroupByWithModifier::Totals => write!(f, "WITH TOTALS"),
GroupByWithModifier::GroupingSets(expr) => {
write!(f, "{expr}")
}
}
}
}
Expand Down
10 changes: 10 additions & 0 deletions src/dialect/clickhouse.rs
Original file line number Diff line number Diff line change
Expand Up @@ -79,4 +79,14 @@ impl Dialect for ClickHouseDialect {
fn supports_from_first_select(&self) -> bool {
true
}

// See <https://clickhouse.com/docs/en/sql-reference/aggregate-functions/grouping_function#grouping-sets>
fn supports_group_by_expr(&self) -> bool {
true
}

/// See <https://clickhouse.com/docs/en/sql-reference/statements/select/group-by#rollup-modifier>
fn supports_group_by_with_modifier(&self) -> bool {
true
}
}
4 changes: 4 additions & 0 deletions src/dialect/generic.rs
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,10 @@ impl Dialect for GenericDialect {
true
}

fn supports_group_by_with_modifier(&self) -> bool {
true
}

fn supports_connect_by(&self) -> bool {
true
}
Expand Down
11 changes: 8 additions & 3 deletions src/dialect/hive.rs
Original file line number Diff line number Diff line change
Expand Up @@ -52,18 +52,23 @@ impl Dialect for HiveDialect {
true
}

/// See Hive <https://cwiki.apache.org/confluence/pages/viewpage.action?pageId=27362061#Tutorial-BuiltInOperators>
/// See <https://cwiki.apache.org/confluence/pages/viewpage.action?pageId=27362061#Tutorial-BuiltInOperators>
fn supports_bang_not_operator(&self) -> bool {
true
}

/// See Hive <https://cwiki.apache.org/confluence/pages/viewpage.action?pageId=27362036#LanguageManualDML-Loadingfilesintotables>
/// See <https://cwiki.apache.org/confluence/pages/viewpage.action?pageId=27362036#LanguageManualDML-Loadingfilesintotables>
fn supports_load_data(&self) -> bool {
true
}

/// See Hive <https://cwiki.apache.org/confluence/display/hive/languagemanual+sampling>
/// See <https://cwiki.apache.org/confluence/display/hive/languagemanual+sampling>
fn supports_table_sample_before_alias(&self) -> bool {
true
}

/// See <https://cwiki.apache.org/confluence/pages/viewpage.action?pageId=30151323#EnhancedAggregation,Cube,GroupingandRollup-CubesandRollupsr>
fn supports_group_by_with_modifier(&self) -> bool {
true
}
}
6 changes: 6 additions & 0 deletions src/dialect/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -245,6 +245,12 @@ pub trait Dialect: Debug + Any {
false
}

/// Returns true if the dialects supports `GROUP BY` modifiers prefixed by a `WITH` keyword.
/// Example: `GROUP BY value WITH ROLLUP`.
fn supports_group_by_with_modifier(&self) -> bool {
false
}

/// Returns true if the dialect supports CONNECT BY.
fn supports_connect_by(&self) -> bool {
false
Expand Down
10 changes: 9 additions & 1 deletion src/parser/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9148,7 +9148,7 @@ impl<'a> Parser<'a> {
};

let mut modifiers = vec![];
if dialect_of!(self is ClickHouseDialect | GenericDialect) {
if self.dialect.supports_group_by_with_modifier() {
loop {
if !self.parse_keyword(Keyword::WITH) {
break;
Expand All @@ -9171,6 +9171,14 @@ impl<'a> Parser<'a> {
});
}
}
if self.parse_keywords(&[Keyword::GROUPING, Keyword::SETS]) {
self.expect_token(&Token::LParen)?;
let result = self.parse_comma_separated(|p| p.parse_tuple(true, true))?;
self.expect_token(&Token::RParen)?;
modifiers.push(GroupByWithModifier::GroupingSets(Expr::GroupingSets(
result,
)));
};
let group_by = match expressions {
None => GroupByExpr::All(modifiers),
Some(exprs) => GroupByExpr::Expressions(exprs, modifiers),
Expand Down
55 changes: 0 additions & 55 deletions tests/sqlparser_clickhouse.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1069,61 +1069,6 @@ fn parse_create_materialized_view() {
clickhouse_and_generic().verified_stmt(sql);
}

#[test]
fn parse_group_by_with_modifier() {
let clauses = ["x", "a, b", "ALL"];
let modifiers = [
"WITH ROLLUP",
"WITH CUBE",
"WITH TOTALS",
"WITH ROLLUP WITH CUBE",
];
let expected_modifiers = [
vec![GroupByWithModifier::Rollup],
vec![GroupByWithModifier::Cube],
vec![GroupByWithModifier::Totals],
vec![GroupByWithModifier::Rollup, GroupByWithModifier::Cube],
];
for clause in &clauses {
for (modifier, expected_modifier) in modifiers.iter().zip(expected_modifiers.iter()) {
let sql = format!("SELECT * FROM t GROUP BY {clause} {modifier}");
match clickhouse_and_generic().verified_stmt(&sql) {
Statement::Query(query) => {
let group_by = &query.body.as_select().unwrap().group_by;
if clause == &"ALL" {
assert_eq!(group_by, &GroupByExpr::All(expected_modifier.to_vec()));
} else {
assert_eq!(
group_by,
&GroupByExpr::Expressions(
clause
.split(", ")
.map(|c| Identifier(Ident::new(c)))
.collect(),
expected_modifier.to_vec()
)
);
}
}
_ => unreachable!(),
}
}
}

// invalid cases
let invalid_cases = [
"SELECT * FROM t GROUP BY x WITH",
"SELECT * FROM t GROUP BY x WITH ROLLUP CUBE",
"SELECT * FROM t GROUP BY x WITH WITH ROLLUP",
"SELECT * FROM t GROUP BY WITH ROLLUP",
];
for sql in invalid_cases {
clickhouse_and_generic()
.parse_sql_statements(sql)
.expect_err("Expected: one of ROLLUP or CUBE or TOTALS, found: WITH");
}
}

#[test]
fn parse_select_order_by_with_fill_interpolate() {
let sql = "SELECT id, fname, lname FROM customer WHERE id < 5 \
Expand Down
86 changes: 86 additions & 0 deletions tests/sqlparser_common.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2447,6 +2447,92 @@ fn parse_select_group_by_all() {
);
}

#[test]
fn parse_group_by_with_modifier() {
let clauses = ["x", "a, b", "ALL"];
let modifiers = [
"WITH ROLLUP",
"WITH CUBE",
"WITH TOTALS",
"WITH ROLLUP WITH CUBE",
];
let expected_modifiers = [
vec![GroupByWithModifier::Rollup],
vec![GroupByWithModifier::Cube],
vec![GroupByWithModifier::Totals],
vec![GroupByWithModifier::Rollup, GroupByWithModifier::Cube],
];
let dialects = all_dialects_where(|d| d.supports_group_by_with_modifier());

for clause in &clauses {
for (modifier, expected_modifier) in modifiers.iter().zip(expected_modifiers.iter()) {
let sql = format!("SELECT * FROM t GROUP BY {clause} {modifier}");
match dialects.verified_stmt(&sql) {
Statement::Query(query) => {
let group_by = &query.body.as_select().unwrap().group_by;
if clause == &"ALL" {
assert_eq!(group_by, &GroupByExpr::All(expected_modifier.to_vec()));
} else {
assert_eq!(
group_by,
&GroupByExpr::Expressions(
clause
.split(", ")
.map(|c| Identifier(Ident::new(c)))
.collect(),
expected_modifier.to_vec()
)
);
}
}
_ => unreachable!(),
}
}
}

// invalid cases
let invalid_cases = [
"SELECT * FROM t GROUP BY x WITH",
"SELECT * FROM t GROUP BY x WITH ROLLUP CUBE",
"SELECT * FROM t GROUP BY x WITH WITH ROLLUP",
"SELECT * FROM t GROUP BY WITH ROLLUP",
];
for sql in invalid_cases {
dialects
.parse_sql_statements(sql)
.expect_err("Expected: one of ROLLUP or CUBE or TOTALS, found: WITH");
}
}

#[test]
fn parse_group_by_special_grouping_sets() {
let sql = "SELECT a, b, SUM(c) FROM tab1 GROUP BY a, b GROUPING SETS ((a, b), (a), (b), ())";
match all_dialects().verified_stmt(sql) {
Statement::Query(query) => {
let group_by = &query.body.as_select().unwrap().group_by;
assert_eq!(
group_by,
&GroupByExpr::Expressions(
vec![
Expr::Identifier(Ident::new("a")),
Expr::Identifier(Ident::new("b"))
],
vec![GroupByWithModifier::GroupingSets(Expr::GroupingSets(vec![
vec![
Expr::Identifier(Ident::new("a")),
Expr::Identifier(Ident::new("b"))
],
vec![Expr::Identifier(Ident::new("a")),],
vec![Expr::Identifier(Ident::new("b"))],
vec![]
]))]
)
);
}
_ => unreachable!(),
}
}

#[test]
fn parse_select_having() {
let sql = "SELECT foo FROM bar GROUP BY foo HAVING COUNT(*) > 1";
Expand Down

0 comments on commit 1c0e5d3

Please sign in to comment.