Skip to content

Commit 37c8768

Browse files
authored
BE-187: Refactor SQL query compiler with Identifier types and improved JOIN handling (#7951)
1 parent 25ed5e9 commit 37c8768

File tree

19 files changed

+923
-357
lines changed

19 files changed

+923
-357
lines changed

libs/@local/graph/postgres-store/src/store/postgres/query/compile.rs

Lines changed: 172 additions & 174 deletions
Large diffs are not rendered by default.

libs/@local/graph/postgres-store/src/store/postgres/query/condition.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ use crate::store::postgres::query::{Expression, Transpile};
55
/// A [`Filter`], which can be transpiled.
66
///
77
/// [`Filter`]: hash_graph_store::filter::Filter
8-
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
8+
#[derive(Debug, Clone, PartialEq, Eq)]
99
pub enum Condition {
1010
All(Vec<Self>),
1111
Any(Vec<Self>),
Lines changed: 140 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,140 @@
1+
use core::fmt::{self, Write as _};
2+
3+
use super::{identifier::Identifier, table_reference::TableReference};
4+
use crate::store::postgres::query::{Column, Transpile};
5+
6+
/// A column name in a PostgreSQL query.
7+
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
8+
pub enum ColumnName<'name> {
9+
/// A dynamically-named column, quoted when transpiled
10+
Named(Identifier<'name>),
11+
/// A statically-defined column from the database schema
12+
Static(Column),
13+
/// The asterisk wildcard (`*`)
14+
Asterisk,
15+
}
16+
17+
/// A reference to a column, optionally qualified with a table reference.
18+
///
19+
/// Transpiles to `<table>.<column>` or just `<column>` if no table qualification is present.
20+
#[derive(Clone, PartialEq, Eq, Hash)]
21+
pub struct ColumnReference<'name> {
22+
pub correlation: Option<TableReference<'name>>,
23+
pub name: ColumnName<'name>,
24+
}
25+
26+
impl fmt::Debug for ColumnReference<'_> {
27+
fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result {
28+
self.transpile(fmt)
29+
}
30+
}
31+
32+
impl Transpile for ColumnReference<'_> {
33+
fn transpile(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result {
34+
if let Some(correlation) = &self.correlation {
35+
correlation.transpile(fmt)?;
36+
fmt.write_char('.')?;
37+
}
38+
39+
match &self.name {
40+
ColumnName::Named(name) => name.transpile(fmt),
41+
ColumnName::Static(column) => column.transpile(fmt),
42+
ColumnName::Asterisk => fmt.write_char('*'),
43+
}
44+
}
45+
}
46+
47+
#[cfg(test)]
48+
mod tests {
49+
use super::*;
50+
use crate::store::postgres::query::expression::table_reference::{SchemaReference, TableName};
51+
52+
#[test]
53+
fn simple_column_reference() {
54+
let col_ref = ColumnReference {
55+
correlation: None,
56+
name: ColumnName::Named(Identifier::from("username")),
57+
};
58+
assert_eq!(col_ref.transpile_to_string(), r#""username""#);
59+
}
60+
61+
#[test]
62+
fn qualified_column_reference() {
63+
let col_ref = ColumnReference {
64+
correlation: Some(TableReference {
65+
schema: None,
66+
name: TableName::from("users"),
67+
alias: None,
68+
}),
69+
name: ColumnName::Named(Identifier::from("username")),
70+
};
71+
assert_eq!(col_ref.transpile_to_string(), r#""users"."username""#);
72+
}
73+
74+
#[test]
75+
fn fully_qualified_column_reference() {
76+
let col_ref = ColumnReference {
77+
correlation: Some(TableReference {
78+
schema: Some(SchemaReference {
79+
database: Some(Identifier::from("mydb")),
80+
name: Identifier::from("public"),
81+
}),
82+
name: TableName::from("users"),
83+
alias: None,
84+
}),
85+
name: ColumnName::Named(Identifier::from("username")),
86+
};
87+
assert_eq!(
88+
col_ref.transpile_to_string(),
89+
r#""mydb"."public"."users"."username""#
90+
);
91+
}
92+
93+
#[test]
94+
fn column_reference_with_special_chars() {
95+
let col_ref = ColumnReference {
96+
correlation: Some(TableReference {
97+
schema: None,
98+
name: TableName::from("user-table"),
99+
alias: None,
100+
}),
101+
name: ColumnName::Named(Identifier::from("user name")),
102+
};
103+
assert_eq!(col_ref.transpile_to_string(), r#""user-table"."user name""#);
104+
}
105+
106+
#[test]
107+
fn column_reference_with_quotes() {
108+
let col_ref = ColumnReference {
109+
correlation: Some(TableReference {
110+
schema: None,
111+
name: TableName::from(r#"my"table"#),
112+
alias: None,
113+
}),
114+
name: ColumnName::Named(Identifier::from(r#"my"column"#)),
115+
};
116+
assert_eq!(col_ref.transpile_to_string(), r#""my""table"."my""column""#);
117+
}
118+
119+
#[test]
120+
fn asterisk_column_reference() {
121+
let col_ref = ColumnReference {
122+
correlation: None,
123+
name: ColumnName::Asterisk,
124+
};
125+
assert_eq!(col_ref.transpile_to_string(), "*");
126+
}
127+
128+
#[test]
129+
fn qualified_asterisk_column_reference() {
130+
let col_ref = ColumnReference {
131+
correlation: Some(TableReference {
132+
schema: None,
133+
name: TableName::from("users"),
134+
alias: None,
135+
}),
136+
name: ColumnName::Asterisk,
137+
};
138+
assert_eq!(col_ref.transpile_to_string(), r#""users".*"#);
139+
}
140+
}

libs/@local/graph/postgres-store/src/store/postgres/query/expression/conditional.rs

Lines changed: 11 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -4,12 +4,12 @@ use core::fmt::{
44

55
use hash_graph_store::filter::PathToken;
66

7+
use super::ColumnReference;
78
use crate::store::postgres::query::{
8-
Alias, AliasedTable, Column, SelectStatement, Table, Transpile, WindowStatement,
9-
table::DatabaseColumn as _,
9+
Alias, Column, SelectStatement, Table, Transpile, WindowStatement, table::DatabaseColumn as _,
1010
};
1111

12-
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
12+
#[derive(Debug, Clone, PartialEq, Eq)]
1313
pub enum Function {
1414
Min(Box<Expression>),
1515
Max(Box<Expression>),
@@ -157,11 +157,11 @@ impl Transpile for PostgresType {
157157
}
158158

159159
/// A compiled expression in Postgres.
160-
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
160+
#[derive(Debug, Clone, PartialEq, Eq)]
161161
pub enum Expression {
162162
Asterisk,
163-
Column(Column),
164-
ColumnReference {
163+
ColumnReference(ColumnReference<'static>),
164+
AliasedColumn {
165165
column: Column,
166166
table_alias: Option<Alias>,
167167
},
@@ -186,26 +186,22 @@ impl Transpile for Expression {
186186
fn transpile(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
187187
match self {
188188
Self::Asterisk => fmt.write_char('*'),
189-
Self::Column(column) => column.transpile(fmt),
190-
Self::ColumnReference {
189+
Self::ColumnReference(column) => column.transpile(fmt),
190+
Self::AliasedColumn {
191191
column,
192192
table_alias,
193193
} => {
194194
let table = column.table();
195195
if let Some(alias) = *table_alias {
196-
AliasedTable { table, alias }.transpile(fmt)?;
196+
table.aliased(alias).transpile(fmt)?;
197197
} else {
198198
table.transpile(fmt)?;
199199
}
200200
write!(fmt, r#"."{}""#, column.as_str())
201201
}
202202
Self::TableReference { table, alias } => {
203203
if let Some(alias) = *alias {
204-
AliasedTable {
205-
table: *table,
206-
alias,
207-
}
208-
.transpile(fmt)
204+
table.aliased(alias).transpile(fmt)
209205
} else {
210206
table.transpile(fmt)
211207
}
@@ -271,7 +267,7 @@ mod tests {
271267
#[test]
272268
fn transpile_function_expression() {
273269
assert_eq!(
274-
Expression::Function(Function::Min(Box::new(Expression::ColumnReference {
270+
Expression::Function(Function::Min(Box::new(Expression::AliasedColumn {
275271
column: DataTypeQueryPath::Version.terminating_column().0,
276272
table_alias: Some(Alias {
277273
condition_index: 1,

libs/@local/graph/postgres-store/src/store/postgres/query/expression/group_by_clause.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ use core::fmt;
22

33
use crate::store::postgres::query::{Expression, Transpile};
44

5-
#[derive(Debug, Clone, Default, PartialEq, Eq, Hash)]
5+
#[derive(Debug, Clone, Default, PartialEq, Eq)]
66
pub struct GroupByExpression {
77
pub expressions: Vec<Expression>,
88
}
@@ -36,15 +36,15 @@ mod tests {
3636
fn order_one() {
3737
let order_by_expression = GroupByExpression {
3838
expressions: vec![
39-
Expression::ColumnReference {
39+
Expression::AliasedColumn {
4040
column: EntityQueryPath::WebId.terminating_column().0,
4141
table_alias: Some(Alias {
4242
condition_index: 1,
4343
chain_depth: 2,
4444
number: 3,
4545
}),
4646
},
47-
Expression::ColumnReference {
47+
Expression::AliasedColumn {
4848
column: EntityQueryPath::Uuid.terminating_column().0,
4949
table_alias: Some(Alias {
5050
condition_index: 4,
Lines changed: 136 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,136 @@
1+
use alloc::borrow::Cow;
2+
use core::fmt::{self, Write as _};
3+
4+
use crate::store::postgres::query::Transpile;
5+
6+
/// A PostgreSQL identifier (table name, column name, alias, schema name, etc.).
7+
///
8+
/// Identifiers are **always** quoted using double quotes (`"`) when transpiled to SQL. This
9+
/// ensures:
10+
/// - Protection against SQL injection
11+
/// - Correct handling of SQL reserved keywords (e.g., `user`, `select`)
12+
/// - Support for identifiers with special characters
13+
/// - Preservation of case-sensitivity
14+
///
15+
/// Internal double quotes are escaped by doubling them (`""`) as per PostgreSQL's standard.
16+
#[derive(Clone, PartialEq, Eq, Hash)]
17+
pub struct Identifier<'name> {
18+
name: Cow<'name, str>,
19+
}
20+
21+
impl fmt::Debug for Identifier<'_> {
22+
fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result {
23+
self.transpile(fmt)
24+
}
25+
}
26+
27+
impl<'name> From<&'name str> for Identifier<'name> {
28+
fn from(name: &'name str) -> Self {
29+
Self {
30+
name: Cow::Borrowed(name),
31+
}
32+
}
33+
}
34+
35+
impl From<String> for Identifier<'_> {
36+
fn from(identifier: String) -> Self {
37+
Self {
38+
name: Cow::Owned(identifier),
39+
}
40+
}
41+
}
42+
43+
impl AsRef<str> for Identifier<'_> {
44+
fn as_ref(&self) -> &str {
45+
&self.name
46+
}
47+
}
48+
49+
impl Transpile for Identifier<'_> {
50+
fn transpile(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result {
51+
fmt.write_char('"')?;
52+
for ch in self.name.chars() {
53+
if ch == '"' {
54+
fmt.write_str("\"\"")?;
55+
} else {
56+
fmt.write_char(ch)?;
57+
}
58+
}
59+
fmt.write_char('"')
60+
}
61+
}
62+
63+
#[cfg(test)]
64+
mod tests {
65+
use super::*;
66+
67+
#[test]
68+
fn simple_identifier() {
69+
let id = Identifier::from("users");
70+
assert_eq!(id.transpile_to_string(), r#""users""#);
71+
}
72+
73+
#[test]
74+
fn keyword_identifier() {
75+
let id = Identifier::from("select");
76+
assert_eq!(id.transpile_to_string(), r#""select""#);
77+
78+
let id = Identifier::from("user");
79+
assert_eq!(id.transpile_to_string(), r#""user""#);
80+
81+
let id = Identifier::from("table");
82+
assert_eq!(id.transpile_to_string(), r#""table""#);
83+
}
84+
85+
#[test]
86+
fn identifier_with_special_chars() {
87+
let id = Identifier::from("my-table");
88+
assert_eq!(id.transpile_to_string(), r#""my-table""#);
89+
90+
let id = Identifier::from("table name");
91+
assert_eq!(id.transpile_to_string(), r#""table name""#);
92+
93+
let id = Identifier::from("123abc");
94+
assert_eq!(id.transpile_to_string(), r#""123abc""#);
95+
}
96+
97+
#[test]
98+
fn identifier_with_quotes() {
99+
// Single internal quote: my"table → "my""table"
100+
let id = Identifier::from(r#"my"table"#);
101+
assert_eq!(id.transpile_to_string(), r#""my""table""#);
102+
103+
// Multiple internal quotes: a"b"c → "a""b""c"
104+
let id = Identifier::from(r#"a"b"c"#);
105+
assert_eq!(id.transpile_to_string(), r#""a""b""c""#);
106+
107+
// Just a quote: " → """"
108+
let id = Identifier::from(r#"""#);
109+
assert_eq!(id.transpile_to_string(), r#""""""#);
110+
}
111+
112+
#[test]
113+
fn case_sensitive_identifier() {
114+
let id = Identifier::from("UsErS");
115+
assert_eq!(id.transpile_to_string(), r#""UsErS""#);
116+
117+
let id = Identifier::from("USERS");
118+
assert_eq!(id.transpile_to_string(), r#""USERS""#);
119+
}
120+
121+
#[test]
122+
fn empty_identifier() {
123+
let id = Identifier::from("");
124+
assert_eq!(id.transpile_to_string(), r#""""#);
125+
}
126+
127+
#[test]
128+
#[expect(clippy::non_ascii_literal)]
129+
fn unicode_identifier() {
130+
let id = Identifier::from("用户表"); // "user table" in Chinese
131+
assert_eq!(id.transpile_to_string(), r#""用户表""#);
132+
133+
let id = Identifier::from("café");
134+
assert_eq!(id.transpile_to_string(), r#""café""#);
135+
}
136+
}

0 commit comments

Comments
 (0)