Skip to content

Commit c96c0c6

Browse files
committed
fix(graph): allow more complex dataset and table names
1 parent b3044af commit c96c0c6

File tree

5 files changed

+154
-87
lines changed

5 files changed

+154
-87
lines changed

graph/src/amp/manifest/data_source/raw.rs

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -144,7 +144,9 @@ impl RawSource {
144144
end_block,
145145
} = self;
146146

147-
validate_ident(&dataset).map_err(|e| e.source_context("invalid `dataset`"))?;
147+
if dataset.is_empty() {
148+
return Err(Error::InvalidValue(anyhow!("`dataset` cannot be empty")));
149+
}
148150
Self::validate_tables(&tables)?;
149151

150152
let address = address.unwrap_or(Address::ZERO);
@@ -180,8 +182,11 @@ impl RawSource {
180182
}
181183

182184
for (i, table) in tables.iter().enumerate() {
183-
validate_ident(table)
184-
.map_err(|e| e.source_context(format!("invalid `table` at index {i}")))?;
185+
if table.is_empty() {
186+
return Err(Error::InvalidValue(anyhow!(
187+
"`table` at index {i} cannot be empty"
188+
)));
189+
}
185190
}
186191

187192
Ok(())

graph/src/amp/sql/query_builder/block_range_query.rs

Lines changed: 28 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ use ahash::RandomState;
88
use alloy::primitives::BlockNumber;
99
use sqlparser_latest::ast::{self, VisitMut, VisitorMut};
1010

11-
use super::{extract_tables, normalize_table, parse_query};
11+
use super::{extract_tables, parse_query, TableReference};
1212

1313
/// Limits the query execution to the specified block range.
1414
///
@@ -56,7 +56,7 @@ pub(super) fn new_block_range_query<'a>(
5656
fn new_tables_to_ctes_mapping(
5757
query: &ast::Query,
5858
hasher: &mut impl Hasher,
59-
) -> BTreeMap<String, String> {
59+
) -> BTreeMap<TableReference, String> {
6060
extract_tables(query)
6161
.into_iter()
6262
.map(|table| {
@@ -69,12 +69,12 @@ fn new_tables_to_ctes_mapping(
6969

7070
/// Visits the SQL query AST and replaces referenced table names with CTE names.
7171
struct TableReplacer {
72-
tables_to_ctes_mapping: BTreeMap<String, String>,
72+
tables_to_ctes_mapping: BTreeMap<TableReference, String>,
7373
}
7474

7575
impl TableReplacer {
7676
/// Creates a new table replacer.
77-
fn new(tables_to_ctes_mapping: BTreeMap<String, String>) -> Self {
77+
fn new(tables_to_ctes_mapping: BTreeMap<TableReference, String>) -> Self {
7878
Self {
7979
tables_to_ctes_mapping,
8080
}
@@ -86,7 +86,10 @@ impl TableReplacer {
8686
return;
8787
};
8888

89-
let Some(cte_table) = self.tables_to_ctes_mapping.get(&normalize_table(name)) else {
89+
let Some(cte_table) = self
90+
.tables_to_ctes_mapping
91+
.get(&TableReference::with_object_name(name))
92+
else {
9093
return;
9194
};
9295

@@ -133,20 +136,20 @@ mod tests {
133136
assert_eq!(
134137
block_range_query,
135138
parse_query(
136-
"
137-
WITH block_range_14621009630487609643 AS (
138-
SELECT * FROM d WHERE _block_num BETWEEN 0 AND 1000000
139+
r#"
140+
WITH block_range_1164572571450379730 AS (
141+
SELECT * FROM "d" WHERE _block_num BETWEEN 0 AND 1000000
139142
),
140-
source_14621009630487609643 AS (
141-
SELECT a, b, c FROM block_range_14621009630487609643 AS d
143+
source_1164572571450379730 AS (
144+
SELECT a, b, c FROM block_range_1164572571450379730 AS d
142145
)
143146
SELECT
144-
source_14621009630487609643.*
147+
source_1164572571450379730.*
145148
FROM
146-
source_14621009630487609643
149+
source_1164572571450379730
147150
ORDER BY
148-
source_14621009630487609643.b
149-
"
151+
source_1164572571450379730.b
152+
"#
150153
)
151154
.unwrap(),
152155
)
@@ -162,23 +165,23 @@ mod tests {
162165
assert_eq!(
163166
block_range_query,
164167
parse_query(
165-
"
166-
WITH block_range_14621009630487609643 AS (
167-
SELECT * FROM d WHERE _block_num BETWEEN 0 AND 1000000
168+
r#"
169+
WITH block_range_1164572571450379730 AS (
170+
SELECT * FROM "d" WHERE _block_num BETWEEN 0 AND 1000000
168171
),
169-
block_range_12377422807768256314 AS (
170-
SELECT * FROM e WHERE _block_num BETWEEN 0 AND 1000000
172+
block_range_13063992259633584610 AS (
173+
SELECT * FROM "e" WHERE _block_num BETWEEN 0 AND 1000000
171174
),
172-
source_12377422807768256314 AS (
173-
SELECT a, b, c FROM block_range_14621009630487609643 AS d JOIN block_range_12377422807768256314 AS e ON e.e = d.d
175+
source_13063992259633584610 AS (
176+
SELECT a, b, c FROM block_range_1164572571450379730 AS d JOIN block_range_13063992259633584610 AS e ON e.e = d.d
174177
)
175178
SELECT
176-
source_12377422807768256314.*
179+
source_13063992259633584610.*
177180
FROM
178-
source_12377422807768256314
181+
source_13063992259633584610
179182
ORDER BY
180-
source_12377422807768256314.b
181-
"
183+
source_13063992259633584610.b
184+
"#
182185
)
183186
.unwrap(),
184187
)

graph/src/amp/sql/query_builder/mod.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ use self::{
2222
event_signature_resolver::resolve_event_signatures,
2323
parser::parse_query,
2424
source_address_resolver::resolve_source_address,
25-
table_extractor::{extract_tables, normalize_table},
25+
table_extractor::{extract_tables, TableReference},
2626
table_validator::validate_tables,
2727
};
2828

graph/src/amp/sql/query_builder/table_extractor.rs

Lines changed: 94 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -1,34 +1,84 @@
1-
use std::{collections::BTreeSet, ops::ControlFlow};
1+
use std::{collections::BTreeSet, fmt, ops::ControlFlow};
22

3-
use itertools::Itertools;
43
use sqlparser_latest::ast::{self, Visit, Visitor};
54

65
/// Returns all tables that are referenced by the SQL query.
76
///
87
/// The table names are lowercased and quotes are ignored.
9-
pub(super) fn extract_tables(query: &ast::Query) -> BTreeSet<String> {
8+
pub(super) fn extract_tables(query: &ast::Query) -> BTreeSet<TableReference> {
109
let mut table_extractor = TableExtractor::new();
1110
let _: ControlFlow<()> = Visit::visit(query, &mut table_extractor);
1211

1312
table_extractor.tables
1413
}
1514

16-
/// Returns the normalized table name.
15+
/// Contains a normalized table reference.
1716
///
18-
/// The table name is lowercased and quotes are ignored.
19-
pub(super) fn normalize_table(object_name: &ast::ObjectName) -> String {
20-
object_name
21-
.0
22-
.iter()
23-
.map(|part| match part {
24-
ast::ObjectNamePart::Identifier(ident) => ident.value.to_lowercase(),
25-
})
26-
.join(".")
17+
/// Used to compare physical table references with CTE names and custom tables.
18+
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
19+
pub(super) struct TableReference(ast::ObjectName);
20+
21+
impl TableReference {
22+
const QUOTE_STYLE: char = '"';
23+
24+
/// Creates a new table reference from a custom dataset and table.
25+
pub(super) fn new(dataset: &str, table: &str) -> Self {
26+
Self(
27+
vec![
28+
ast::Ident::with_quote(Self::QUOTE_STYLE, dataset),
29+
ast::Ident::with_quote(Self::QUOTE_STYLE, table),
30+
]
31+
.into(),
32+
)
33+
}
34+
35+
/// Creates a new table reference from an object name.
36+
pub(super) fn with_object_name(object_name: &ast::ObjectName) -> Self {
37+
Self::with_idents(
38+
object_name
39+
.0
40+
.iter()
41+
.map(|object_name_part| match object_name_part {
42+
ast::ObjectNamePart::Identifier(ident) => ident,
43+
}),
44+
)
45+
}
46+
47+
/// Creates a new table reference from a list of identifiers.
48+
pub(super) fn with_idents<'a>(idents: impl IntoIterator<Item = &'a ast::Ident>) -> Self {
49+
Self(
50+
idents
51+
.into_iter()
52+
.map(|ident| {
53+
let ast::Ident {
54+
value,
55+
quote_style,
56+
span: _,
57+
} = ident;
58+
59+
ast::Ident::with_quote(Self::QUOTE_STYLE, {
60+
if quote_style.is_none() {
61+
value.to_lowercase()
62+
} else {
63+
value.to_owned()
64+
}
65+
})
66+
})
67+
.collect::<Vec<_>>()
68+
.into(),
69+
)
70+
}
71+
}
72+
73+
impl fmt::Display for TableReference {
74+
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
75+
write!(f, "{}", self.0)
76+
}
2777
}
2878

2979
/// Visits the SQL query AST and extracts referenced table names, ignoring CTEs.
3080
struct TableExtractor {
31-
tables: BTreeSet<String>,
81+
tables: BTreeSet<TableReference>,
3282
cte_stack: CteStack,
3383
}
3484

@@ -47,13 +97,12 @@ impl TableExtractor {
4797
return;
4898
};
4999

50-
let table = normalize_table(name);
51-
52-
if self.cte_stack.contains(&table) {
100+
let table_reference = TableReference::with_object_name(name);
101+
if self.cte_stack.contains(&table_reference) {
53102
return;
54103
}
55104

56-
self.tables.insert(table);
105+
self.tables.insert(table_reference);
57106
}
58107
}
59108

@@ -81,7 +130,7 @@ impl Visitor for TableExtractor {
81130

82131
/// Maintains a list of active CTEs for each subquery scope.
83132
struct CteStack {
84-
stack: Vec<BTreeSet<String>>,
133+
stack: Vec<BTreeSet<TableReference>>,
85134
}
86135

87136
impl CteStack {
@@ -90,9 +139,11 @@ impl CteStack {
90139
Self { stack: Vec::new() }
91140
}
92141

93-
/// Returns `true` if the `table_name` is present in the CTE list at any scope.
94-
fn contains(&self, table_name: &str) -> bool {
95-
self.stack.iter().any(|scope| scope.contains(table_name))
142+
/// Returns `true` if the `table_reference` is present in the CTE list at any scope.
143+
fn contains(&self, table_reference: &TableReference) -> bool {
144+
self.stack
145+
.iter()
146+
.any(|scope| scope.contains(table_reference))
96147
}
97148

98149
/// Creates a new subquery scope with all the CTEs of the current `query`.
@@ -101,7 +152,7 @@ impl CteStack {
101152
Some(with) => with
102153
.cte_tables
103154
.iter()
104-
.map(|cte_table| cte_table.alias.name.value.to_lowercase())
155+
.map(|cte_table| TableReference::with_idents([&cte_table.alias.name]))
105156
.collect(),
106157
None => BTreeSet::new(),
107158
};
@@ -126,28 +177,31 @@ mod tests {
126177
#[test]
127178
fn $name() {
128179
let query = parse_query($input).unwrap();
129-
assert_eq!(extract_tables(&query), $expected.into_iter().map(Into::into).collect());
180+
assert_eq!(
181+
extract_tables(&query).into_iter().map(|table| table.to_string()).collect::<Vec<_>>(),
182+
$expected.into_iter().map(|table| table.to_string()).collect::<Vec<_>>()
183+
);
130184
}
131185
)*
132186
};
133187
}
134188

135189
test_extract_tables! {
136-
one_table: "SELECT a FROM b" => ["b"],
137-
multiple_tables_with_one_join: "SELECT a FROM b JOIN c ON c.c = b.b" => ["b", "c"],
138-
multiple_tables_with_multiple_joins: "SELECT a FROM b JOIN c ON c.c = b.b JOIN d ON d.d = b.b" => ["b", "c", "d"],
139-
one_table_with_one_cte: "WITH a AS (SELECT * FROM b) SELECT * FROM a" => ["b"],
140-
one_table_with_multiple_ctes: "WITH a AS (SELECT * FROM b), c AS (SELECT * FROM a) SELECT * FROM c" => ["b"],
141-
multiple_tables_with_multiple_ctes: "WITH a AS (SELECT * FROM b), c AS (SELECT * FROM d) SELECT * FROM a JOIN c ON c.c = a.a" => ["b", "d"],
142-
multiple_tables_with_nested_ctes: "WITH a AS (WITH b AS (SELECT * FROM c) SELECT * FROM d JOIN b ON b.b = d.d) SELECT * FROM a" => ["c", "d"],
143-
multiple_tables_with_union: "SELECT a FROM b UNION SELECT c FROM d" => ["b", "d"],
144-
multiple_tables_with_union_all: "SELECT a FROM b UNION ALL SELECT c FROM d" => ["b", "d"],
145-
146-
namespace_is_preserved: "SELECT a FROM b.c" => ["b.c"],
147-
catalog_is_preserved: "SELECT a FROM b.c.d" => ["b.c.d"],
148-
tables_are_lowercased: "SELECT a FROM B.C" => ["b.c"],
149-
single_quotes_in_tables_are_ignored: "SELECT a FROM 'B'.'C'" => ["b.c"],
150-
double_quotes_in_tables_are_ignored: r#"SELECT a FROM "B"."C""# => ["b.c"],
151-
backticks_in_tables_are_ignored: "SELECT a FROM `B`.`C`" => ["b.c"],
190+
one_table: "SELECT a FROM b" => [r#""b""#],
191+
multiple_tables_with_one_join: "SELECT a FROM b JOIN c ON c.c = b.b" => [r#""b""#, r#""c""#],
192+
multiple_tables_with_multiple_joins: "SELECT a FROM b JOIN c ON c.c = b.b JOIN d ON d.d = b.b" => [r#""b""#, r#""c""#, r#""d""#],
193+
one_table_with_one_cte: "WITH a AS (SELECT * FROM b) SELECT * FROM a" => [r#""b""#],
194+
one_table_with_multiple_ctes: "WITH a AS (SELECT * FROM b), c AS (SELECT * FROM a) SELECT * FROM c" => [r#""b""#],
195+
multiple_tables_with_multiple_ctes: "WITH a AS (SELECT * FROM b), c AS (SELECT * FROM d) SELECT * FROM a JOIN c ON c.c = a.a" => [r#""b""#, r#""d""#],
196+
multiple_tables_with_nested_ctes: "WITH a AS (WITH b AS (SELECT * FROM c) SELECT * FROM d JOIN b ON b.b = d.d) SELECT * FROM a" => [r#""c""#, r#""d""#],
197+
multiple_tables_with_union: "SELECT a FROM b UNION SELECT c FROM d" => [r#""b""#, r#""d""#],
198+
multiple_tables_with_union_all: "SELECT a FROM b UNION ALL SELECT c FROM d" => [r#""b""#, r#""d""#],
199+
200+
namespace_is_preserved: "SELECT a FROM b.c" => [r#""b"."c""#],
201+
catalog_is_preserved: "SELECT a FROM b.c.d" => [r#""b"."c"."d""#],
202+
unquoted_tables_are_lowercased: "SELECT a FROM B.C" => [r#""b"."c""#],
203+
single_quotes_in_tables_are_converted_to_double_quotes: "SELECT a FROM 'B'.'C'" => [r#""B"."C""#],
204+
double_quotes_in_tables_are_preserved: r#"SELECT a FROM "B"."C""# => [r#""B"."C""#],
205+
backticks_in_tables_are_converted_to_double_quotes: "SELECT a FROM `B`.`C`" => [r#""B"."C""#],
152206
}
153207
}

0 commit comments

Comments
 (0)