1- use std:: { collections:: BTreeSet , ops:: ControlFlow } ;
1+ use std:: { collections:: BTreeSet , fmt , ops:: ControlFlow } ;
22
3- use itertools:: Itertools ;
43use sqlparser_latest:: ast:: { self , Visit , Visitor } ;
54
65/// Returns all tables that are referenced by the SQL query.
76///
87/// The table names are lowercased and quotes are ignored.
9- pub ( super ) fn extract_tables ( query : & ast:: Query ) -> BTreeSet < String > {
8+ pub ( super ) fn extract_tables ( query : & ast:: Query ) -> BTreeSet < TableReference > {
109 let mut table_extractor = TableExtractor :: new ( ) ;
1110 let _: ControlFlow < ( ) > = Visit :: visit ( query, & mut table_extractor) ;
1211
1312 table_extractor. tables
1413}
1514
16- /// Returns the normalized table name .
15+ /// Contains a normalized table reference .
1716///
18- /// The table name is lowercased and quotes are ignored.
19- pub ( super ) fn normalize_table ( object_name : & ast:: ObjectName ) -> String {
20- object_name
21- . 0
22- . iter ( )
23- . map ( |part| match part {
24- ast:: ObjectNamePart :: Identifier ( ident) => ident. value . to_lowercase ( ) ,
25- } )
26- . join ( "." )
17+ /// Used to compare physical table references with CTE names and custom tables.
18+ #[ derive( Debug , Clone , PartialEq , Eq , PartialOrd , Ord , Hash ) ]
19+ pub ( super ) struct TableReference ( ast:: ObjectName ) ;
20+
21+ impl TableReference {
22+ const QUOTE_STYLE : char = '"' ;
23+
24+ /// Creates a new table reference from a custom dataset and table.
25+ pub ( super ) fn new ( dataset : & str , table : & str ) -> Self {
26+ Self (
27+ vec ! [
28+ ast:: Ident :: with_quote( Self :: QUOTE_STYLE , dataset) ,
29+ ast:: Ident :: with_quote( Self :: QUOTE_STYLE , table) ,
30+ ]
31+ . into ( ) ,
32+ )
33+ }
34+
35+ /// Creates a new table reference from an object name.
36+ pub ( super ) fn with_object_name ( object_name : & ast:: ObjectName ) -> Self {
37+ Self :: with_idents (
38+ object_name
39+ . 0
40+ . iter ( )
41+ . map ( |object_name_part| match object_name_part {
42+ ast:: ObjectNamePart :: Identifier ( ident) => ident,
43+ } ) ,
44+ )
45+ }
46+
47+ /// Creates a new table reference from a list of identifiers.
48+ pub ( super ) fn with_idents < ' a > ( idents : impl IntoIterator < Item = & ' a ast:: Ident > ) -> Self {
49+ Self (
50+ idents
51+ . into_iter ( )
52+ . map ( |ident| {
53+ let ast:: Ident {
54+ value,
55+ quote_style,
56+ span : _,
57+ } = ident;
58+
59+ ast:: Ident :: with_quote ( Self :: QUOTE_STYLE , {
60+ if quote_style. is_none ( ) {
61+ value. to_lowercase ( )
62+ } else {
63+ value. to_owned ( )
64+ }
65+ } )
66+ } )
67+ . collect :: < Vec < _ > > ( )
68+ . into ( ) ,
69+ )
70+ }
71+ }
72+
73+ impl fmt:: Display for TableReference {
74+ fn fmt ( & self , f : & mut fmt:: Formatter < ' _ > ) -> fmt:: Result {
75+ write ! ( f, "{}" , self . 0 )
76+ }
2777}
2878
2979/// Visits the SQL query AST and extracts referenced table names, ignoring CTEs.
3080struct TableExtractor {
31- tables : BTreeSet < String > ,
81+ tables : BTreeSet < TableReference > ,
3282 cte_stack : CteStack ,
3383}
3484
@@ -47,13 +97,12 @@ impl TableExtractor {
4797 return ;
4898 } ;
4999
50- let table = normalize_table ( name) ;
51-
52- if self . cte_stack . contains ( & table) {
100+ let table_reference = TableReference :: with_object_name ( name) ;
101+ if self . cte_stack . contains ( & table_reference) {
53102 return ;
54103 }
55104
56- self . tables . insert ( table ) ;
105+ self . tables . insert ( table_reference ) ;
57106 }
58107}
59108
@@ -81,7 +130,7 @@ impl Visitor for TableExtractor {
81130
82131/// Maintains a list of active CTEs for each subquery scope.
83132struct CteStack {
84- stack : Vec < BTreeSet < String > > ,
133+ stack : Vec < BTreeSet < TableReference > > ,
85134}
86135
87136impl CteStack {
@@ -90,9 +139,11 @@ impl CteStack {
90139 Self { stack : Vec :: new ( ) }
91140 }
92141
93- /// Returns `true` if the `table_name` is present in the CTE list at any scope.
94- fn contains ( & self , table_name : & str ) -> bool {
95- self . stack . iter ( ) . any ( |scope| scope. contains ( table_name) )
142+ /// Returns `true` if the `table_reference` is present in the CTE list at any scope.
143+ fn contains ( & self , table_reference : & TableReference ) -> bool {
144+ self . stack
145+ . iter ( )
146+ . any ( |scope| scope. contains ( table_reference) )
96147 }
97148
98149 /// Creates a new subquery scope with all the CTEs of the current `query`.
@@ -101,7 +152,7 @@ impl CteStack {
101152 Some ( with) => with
102153 . cte_tables
103154 . iter ( )
104- . map ( |cte_table| cte_table. alias . name . value . to_lowercase ( ) )
155+ . map ( |cte_table| TableReference :: with_idents ( [ & cte_table. alias . name ] ) )
105156 . collect ( ) ,
106157 None => BTreeSet :: new ( ) ,
107158 } ;
@@ -126,28 +177,31 @@ mod tests {
126177 #[ test]
127178 fn $name( ) {
128179 let query = parse_query( $input) . unwrap( ) ;
129- assert_eq!( extract_tables( & query) , $expected. into_iter( ) . map( Into :: into) . collect( ) ) ;
180+ assert_eq!(
181+ extract_tables( & query) . into_iter( ) . map( |table| table. to_string( ) ) . collect:: <Vec <_>>( ) ,
182+ $expected. into_iter( ) . map( |table| table. to_string( ) ) . collect:: <Vec <_>>( )
183+ ) ;
130184 }
131185 ) *
132186 } ;
133187 }
134188
135189 test_extract_tables ! {
136- one_table: "SELECT a FROM b" => [ "b" ] ,
137- multiple_tables_with_one_join: "SELECT a FROM b JOIN c ON c.c = b.b" => [ "b" , "c" ] ,
138- multiple_tables_with_multiple_joins: "SELECT a FROM b JOIN c ON c.c = b.b JOIN d ON d.d = b.b" => [ "b" , "c" , "d" ] ,
139- one_table_with_one_cte: "WITH a AS (SELECT * FROM b) SELECT * FROM a" => [ "b" ] ,
140- one_table_with_multiple_ctes: "WITH a AS (SELECT * FROM b), c AS (SELECT * FROM a) SELECT * FROM c" => [ "b" ] ,
141- multiple_tables_with_multiple_ctes: "WITH a AS (SELECT * FROM b), c AS (SELECT * FROM d) SELECT * FROM a JOIN c ON c.c = a.a" => [ "b" , "d" ] ,
142- multiple_tables_with_nested_ctes: "WITH a AS (WITH b AS (SELECT * FROM c) SELECT * FROM d JOIN b ON b.b = d.d) SELECT * FROM a" => [ "c" , "d" ] ,
143- multiple_tables_with_union: "SELECT a FROM b UNION SELECT c FROM d" => [ "b" , "d" ] ,
144- multiple_tables_with_union_all: "SELECT a FROM b UNION ALL SELECT c FROM d" => [ "b" , "d" ] ,
145-
146- namespace_is_preserved: "SELECT a FROM b.c" => [ "b.c" ] ,
147- catalog_is_preserved: "SELECT a FROM b.c.d" => [ "b.c.d" ] ,
148- tables_are_lowercased : "SELECT a FROM B.C" => [ "b.c" ] ,
149- single_quotes_in_tables_are_ignored : "SELECT a FROM 'B'.'C'" => [ "b.c" ] ,
150- double_quotes_in_tables_are_ignored : r#"SELECT a FROM "B"."C""# => [ "b.c" ] ,
151- backticks_in_tables_are_ignored : "SELECT a FROM `B`.`C`" => [ "b.c" ] ,
190+ one_table: "SELECT a FROM b" => [ r#""b""# ] ,
191+ multiple_tables_with_one_join: "SELECT a FROM b JOIN c ON c.c = b.b" => [ r#""b""# , r#""c""# ] ,
192+ multiple_tables_with_multiple_joins: "SELECT a FROM b JOIN c ON c.c = b.b JOIN d ON d.d = b.b" => [ r#""b""# , r#""c""# , r#""d""# ] ,
193+ one_table_with_one_cte: "WITH a AS (SELECT * FROM b) SELECT * FROM a" => [ r#""b""# ] ,
194+ one_table_with_multiple_ctes: "WITH a AS (SELECT * FROM b), c AS (SELECT * FROM a) SELECT * FROM c" => [ r#""b""# ] ,
195+ multiple_tables_with_multiple_ctes: "WITH a AS (SELECT * FROM b), c AS (SELECT * FROM d) SELECT * FROM a JOIN c ON c.c = a.a" => [ r#""b""# , r#""d""# ] ,
196+ multiple_tables_with_nested_ctes: "WITH a AS (WITH b AS (SELECT * FROM c) SELECT * FROM d JOIN b ON b.b = d.d) SELECT * FROM a" => [ r#""c""# , r#""d""# ] ,
197+ multiple_tables_with_union: "SELECT a FROM b UNION SELECT c FROM d" => [ r#""b""# , r#""d""# ] ,
198+ multiple_tables_with_union_all: "SELECT a FROM b UNION ALL SELECT c FROM d" => [ r#""b""# , r#""d""# ] ,
199+
200+ namespace_is_preserved: "SELECT a FROM b.c" => [ r#""b"."c""# ] ,
201+ catalog_is_preserved: "SELECT a FROM b.c.d" => [ r#""b"."c"."d""# ] ,
202+ unquoted_tables_are_lowercased : "SELECT a FROM B.C" => [ r#""b"."c""# ] ,
203+ single_quotes_in_tables_are_converted_to_double_quotes : "SELECT a FROM 'B'.'C'" => [ r#""B"."C""# ] ,
204+ double_quotes_in_tables_are_preserved : r#"SELECT a FROM "B"."C""# => [ r#""B"."C""# ] ,
205+ backticks_in_tables_are_converted_to_double_quotes : "SELECT a FROM `B`.`C`" => [ r#""B"."C""# ] ,
152206 }
153207}
0 commit comments