Skip to content

Commit

Permalink
Implement new group size hint syntax
Browse files Browse the repository at this point in the history
This commit implements the new GROUP SIZE query hint syntax proposed in
design doc MaterializeInc#21500. The syntax comprises three new query hints:
AGGREGATE INPUT GROUP SIZE, DISTINCT ON INPUT GROUP SIZE, and LIMIT
INPUT GROUP SIZE. These hints apply to the syntax constructs they refer
to, as opposed to EXPECTED GROUP SIZE, which applies to all constructs
in a query block. The new syntax cannot be mixed with the old EXPECTED
GROUP SIZE query hint, so an error is raised in case an invalid
combination is detected.
  • Loading branch information
vmarcos committed Sep 5, 2023
1 parent ae4f099 commit dfa89b3
Show file tree
Hide file tree
Showing 8 changed files with 159 additions and 32 deletions.
2 changes: 2 additions & 0 deletions src/sql-lexer/src/keywords.txt
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ Access
Acks
Add
Addresses
Aggregate
All
Alter
And
Expand Down Expand Up @@ -177,6 +178,7 @@ Info
Inherit
Inline
Inner
Input
Insert
Inspect
Int
Expand Down
6 changes: 6 additions & 0 deletions src/sql-parser/src/ast/defs/query.rs
Original file line number Diff line number Diff line change
Expand Up @@ -190,12 +190,18 @@ impl_display!(SetOperator);
#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
pub enum SelectOptionName {
ExpectedGroupSize,
AggregateInputGroupSize,
DistinctOnInputGroupSize,
LimitInputGroupSize,
}

impl AstDisplay for SelectOptionName {
fn fmt<W: fmt::Write>(&self, f: &mut AstFormatter<W>) {
f.write_str(match self {
SelectOptionName::ExpectedGroupSize => "EXPECTED GROUP SIZE",
SelectOptionName::AggregateInputGroupSize => "AGGREGATE INPUT GROUP SIZE",
SelectOptionName::DistinctOnInputGroupSize => "DISTINCT ON INPUT GROUP SIZE",
SelectOptionName::LimitInputGroupSize => "LIMIT INPUT GROUP SIZE",
})
}
}
Expand Down
21 changes: 19 additions & 2 deletions src/sql-parser/src/parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5883,8 +5883,25 @@ impl<'a> Parser<'a> {
}

fn parse_select_option(&mut self) -> Result<SelectOption<Raw>, ParserError> {
self.expect_keywords(&[EXPECTED, GROUP, SIZE])?;
let name = SelectOptionName::ExpectedGroupSize;
let name = match self.expect_one_of_keywords(&[EXPECTED, AGGREGATE, DISTINCT, LIMIT])? {
EXPECTED => {
self.expect_keywords(&[GROUP, SIZE])?;
SelectOptionName::ExpectedGroupSize
}
AGGREGATE => {
self.expect_keywords(&[INPUT, GROUP, SIZE])?;
SelectOptionName::AggregateInputGroupSize
}
DISTINCT => {
self.expect_keywords(&[ON, INPUT, GROUP, SIZE])?;
SelectOptionName::DistinctOnInputGroupSize
}
LIMIT => {
self.expect_keywords(&[INPUT, GROUP, SIZE])?;
SelectOptionName::LimitInputGroupSize
}
_ => unreachable!(),
};
Ok(SelectOption {
name,
value: self.parse_optional_option_value()?,
Expand Down
43 changes: 39 additions & 4 deletions src/sql-parser/tests/testdata/select
Original file line number Diff line number Diff line change
Expand Up @@ -1272,28 +1272,28 @@ Select(SelectStatement { query: Query { ctes: Simple([]), body: Select(Select {
parse-statement
SELECT * FROM foo OPTIONS (bar = 7)
----
error: Expected EXPECTED, found identifier "bar"
error: Expected one of EXPECTED or AGGREGATE or DISTINCT or LIMIT, found identifier "bar"
SELECT * FROM foo OPTIONS (bar = 7)
^

parse-statement
SELECT a, b, min(c) FROM foo GROUP BY a, b OPTIONS (bar = 7)
----
error: Expected EXPECTED, found identifier "bar"
error: Expected one of EXPECTED or AGGREGATE or DISTINCT or LIMIT, found identifier "bar"
SELECT a, b, min(c) FROM foo GROUP BY a, b OPTIONS (bar = 7)
^

parse-statement
SELECT a, b, min(c) FROM foo GROUP BY a, b OPTIONS (bar = 'baz')
----
error: Expected EXPECTED, found identifier "bar"
error: Expected one of EXPECTED or AGGREGATE or DISTINCT or LIMIT, found identifier "bar"
SELECT a, b, min(c) FROM foo GROUP BY a, b OPTIONS (bar = 'baz')
^

parse-statement
SELECT a, b, min(c) FROM foo GROUP BY a, b OPTIONS (bar)
----
error: Expected EXPECTED, found identifier "bar"
error: Expected one of EXPECTED or AGGREGATE or DISTINCT or LIMIT, found identifier "bar"
SELECT a, b, min(c) FROM foo GROUP BY a, b OPTIONS (bar)
^

Expand Down Expand Up @@ -1505,6 +1505,41 @@ SELECT 1 OPTIONS (EXPECTED GROUP SIZE = 1)
----
SELECT 1 OPTIONS (EXPECTED GROUP SIZE = 1)

parse-statement roundtrip
SELECT 1 OPTIONS (AGGREGATE INPUT GROUP SIZE = 1)
----
SELECT 1 OPTIONS (AGGREGATE INPUT GROUP SIZE = 1)

parse-statement roundtrip
SELECT 1 OPTIONS (DISTINCT ON INPUT GROUP SIZE = 1)
----
SELECT 1 OPTIONS (DISTINCT ON INPUT GROUP SIZE = 1)

parse-statement roundtrip
SELECT 1 OPTIONS (LIMIT INPUT GROUP SIZE = 1)
----
SELECT 1 OPTIONS (LIMIT INPUT GROUP SIZE = 1)

parse-statement roundtrip
SELECT 1 OPTIONS (AGGREGATE INPUT GROUP SIZE = 1, DISTINCT ON INPUT GROUP SIZE = 2)
----
SELECT 1 OPTIONS (AGGREGATE INPUT GROUP SIZE = 1, DISTINCT ON INPUT GROUP SIZE = 2)

parse-statement roundtrip
SELECT 1 OPTIONS (AGGREGATE INPUT GROUP SIZE = 1, LIMIT INPUT GROUP SIZE = 3)
----
SELECT 1 OPTIONS (AGGREGATE INPUT GROUP SIZE = 1, LIMIT INPUT GROUP SIZE = 3)

parse-statement roundtrip
SELECT 1 OPTIONS (DISTINCT ON INPUT GROUP SIZE = 2, LIMIT INPUT GROUP SIZE = 3)
----
SELECT 1 OPTIONS (DISTINCT ON INPUT GROUP SIZE = 2, LIMIT INPUT GROUP SIZE = 3)

parse-statement roundtrip
SELECT 1 OPTIONS (AGGREGATE INPUT GROUP SIZE = 1, DISTINCT ON INPUT GROUP SIZE = 2, LIMIT INPUT GROUP SIZE = 3)
----
SELECT 1 OPTIONS (AGGREGATE INPUT GROUP SIZE = 1, DISTINCT ON INPUT GROUP SIZE = 2, LIMIT INPUT GROUP SIZE = 3)

parse-statement roundtrip
SELECT 1 AS "FOO"
----
Expand Down
4 changes: 4 additions & 0 deletions src/sql/src/plan/error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -224,6 +224,7 @@ pub enum PlanError {
max: Duration,
requested: Duration,
},
InvalidGroupSizeHints,
// TODO(benesch): eventually all errors should be structured.
Unstructured(String),
}
Expand Down Expand Up @@ -592,6 +593,9 @@ impl fmt::Display for PlanError {
Self::InvalidTimestampInterval { min, max, requested } => {
write!(f, "invalid timestamp interval of {}ms, must be in the range [{}ms, {}ms]", requested.as_millis(), min.as_millis(), max.as_millis())
}
Self::InvalidGroupSizeHints => f.write_str("EXPECTED GROUP SIZE cannot be provided \
simultaneously with any of AGGREGATE INPUT GROUP SIZE, DISTINCT ON INPUT GROUP SIZE, \
or LIMIT INPUT GROUP SIZE"),
}
}
}
Expand Down
6 changes: 4 additions & 2 deletions src/sql/src/plan/expr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,8 @@ use crate::plan::query::ExprContext;
use crate::plan::typeconv::{self, CastContext};
use crate::plan::Params;

use super::plan_utils::GroupSizeHints;

#[allow(missing_debug_implementations)]
pub struct Hir;

Expand Down Expand Up @@ -1678,7 +1680,7 @@ impl HirRelationExpr {
pub fn finish_maintained(
&mut self,
finishing: &mut RowSetFinishing,
expected_group_size: Option<u64>,
group_size_hints: GroupSizeHints,
) {
if !finishing.is_trivial(self.arity()) {
let old_finishing =
Expand All @@ -1695,7 +1697,7 @@ impl HirRelationExpr {
order_key: old_finishing.order_by,
limit: old_finishing.limit,
offset: old_finishing.offset,
expected_group_size,
expected_group_size: group_size_hints.limit_input_group_size,
}
.project(old_finishing.project)
}
Expand Down
54 changes: 54 additions & 0 deletions src/sql/src/plan/plan_utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ use mz_repr::RelationDesc;

use crate::ast::Ident;
use crate::normalize;
use crate::plan::query::SelectOptionExtracted;
use crate::plan::PlanError;

/// Renames the columns in `desc` with the names in `column_names` if
Expand Down Expand Up @@ -68,3 +69,56 @@ impl fmt::Display for JoinSide {
}
}
}

/// Specifies a bundle of group size query hints.
///
/// This struct bridges from old to new syntax for group size query hints,
/// making it easier to pass these hints along and make use of a group size
/// hint configuration.
#[derive(Debug, Default, Clone, Copy)]
pub struct GroupSizeHints {
pub aggregate_input_group_size: Option<u64>,
pub distinct_on_input_group_size: Option<u64>,
pub limit_input_group_size: Option<u64>,
}

impl TryFrom<SelectOptionExtracted> for GroupSizeHints {
type Error = PlanError;

/// Creates group size hints from extracted `SELECT` `OPTIONS` validating that
/// either the old `EXPECTED GROUP SIZE` syntax was used or alternatively the
/// new syntax with `AGGREGATE INPUT GROUP SIZE`, `DISTINCT ON INPUT GROUP SIZE`,
/// and `LIMIT INPUT GROUP SIZE`. If the two syntax versions are mixed in the
/// same `OPTIONS` clause, an error is returned.[^1]
/// [^1] <https://github.com/MaterializeInc/materialize/blob/main/doc/developer/design/20230829_topk_size_hint.md>
fn try_from(select_option_extracted: SelectOptionExtracted) -> Result<Self, Self::Error> {
let SelectOptionExtracted {
expected_group_size,
aggregate_input_group_size,
distinct_on_input_group_size,
limit_input_group_size,
..
} = select_option_extracted;
if expected_group_size.is_some()
&& (aggregate_input_group_size.is_some()
|| distinct_on_input_group_size.is_some()
|| limit_input_group_size.is_some())
{
Err(PlanError::InvalidGroupSizeHints)
} else {
if expected_group_size.is_some() {
Ok(GroupSizeHints {
aggregate_input_group_size: expected_group_size,
distinct_on_input_group_size: expected_group_size,
limit_input_group_size: expected_group_size,
})
} else {
Ok(GroupSizeHints {
aggregate_input_group_size,
distinct_on_input_group_size,
limit_input_group_size,
})
}
}
}
}
Loading

0 comments on commit dfa89b3

Please sign in to comment.