Merge pull request #22251 from mjibson/pretty

sql-pretty: add pretty printing crate
MaterializeInc · Oct 10, 2023 · 22a8740 · 22a8740
2 parents f347423 + c7ec88e
commit 22a8740
Show file tree

Hide file tree

Showing 11 changed files with 1,022 additions and 118 deletions.
diff --git a/Cargo.lock b/Cargo.lock
diff --git a/Cargo.toml b/Cargo.toml
@@ -69,6 +69,7 @@ members = [
     "src/sql",
     "src/sql-lexer",
     "src/sql-parser",
+    "src/sql-pretty",
     "src/sqllogictest",
     "src/stash",
     "src/stash-debug",

diff --git a/deny.toml b/deny.toml
@@ -58,6 +58,9 @@ skip = [
     # our crates use `bitflags 1.3.2`
     # TODO: fork `tower-http` and swap to use older bitflags
     { name = "bitflags", version = "1.3.2" },
+
+    # `pretty` explicitly chose to use this older version.
+    { name = "arrayvec", version = "0.5.2" },
 ]
 
 # Use `tracing` instead.

diff --git a/src/sql-parser/Cargo.toml b/src/sql-parser/Cargo.toml
@@ -9,6 +9,7 @@ publish = false
 
 [dependencies]
 bytesize = "1.1.0"
+datadriven = { version = "0.6.0", optional = true }
 enum-kinds = "0.5.1"
 itertools = "0.10.5"
 mz-ore = { path = "../ore", default-features = false, features = ["stack"] }
@@ -17,12 +18,12 @@ phf = { version = "0.11.1", features = ["uncased"] }
 serde = { version = "1.0.152", features = ["derive"] }
 tracing = "0.1.37"
 uncased = "0.9.7"
+unicode-width = { version = "0.1.10", optional = true }
 workspace-hack = { version = "0.0.0", path = "../workspace-hack", optional = true }
 
 [dev-dependencies]
-datadriven = "0.6.0"
 mz-ore = { path = "../ore", default-features = false, features = ["test"] }
-unicode-width = "0.1.10"
+mz-sql-parser = { path = ".", features = ["test"] }
 
 [build-dependencies]
 anyhow = "1.0.66"
@@ -31,6 +32,7 @@ mz-walkabout = { path = "../walkabout", default-features = false }
 
 [features]
 default = ["workspace-hack"]
+test = ["datadriven", "unicode-width"]
 
 [package.metadata.cargo-udeps.ignore]
 normal = ["workspace-hack"]
diff --git a/src/sql-parser/src/lib.rs b/src/sql-parser/src/lib.rs
@@ -105,3 +105,118 @@
 
 pub mod ast;
 pub mod parser;
+
+#[cfg(feature = "test")]
+pub fn datadriven_testcase(tc: &datadriven::TestCase) -> String {
+    use crate::ast::display::AstDisplay;
+    use crate::ast::{Expr, Statement};
+    use datadriven::TestCase;
+    use mz_ore::collections::CollectionExt;
+    use mz_ore::fmt::FormatBuffer;
+    use unicode_width::UnicodeWidthStr;
+
+    fn render_error(sql: &str, e: parser::ParserError) -> String {
+        let mut s = format!("error: {}\n", e.message);
+
+        // Do our best to emulate psql in rendering a caret pointing at the
+        // offending character in the query. This makes it possible to detect
+        // incorrect error positions by visually scanning the test files.
+        let end = sql.len();
+        let line_start = sql[..e.pos].rfind('\n').map(|p| p + 1).unwrap_or(0);
+        let line_end = sql[e.pos..].find('\n').map(|p| e.pos + p).unwrap_or(end);
+        writeln!(s, "{}", &sql[line_start..line_end]);
+        for _ in 0..sql[line_start..e.pos].width() {
+            write!(s, " ");
+        }
+        writeln!(s, "^");
+
+        s
+    }
+
+    fn parse_statement(tc: &TestCase) -> String {
+        let input = tc.input.strip_suffix('\n').unwrap_or(&tc.input);
+        match parser::parse_statements(input) {
+            Ok(s) => {
+                if s.len() != 1 {
+                    return "expected exactly one statement\n".to_string();
+                }
+                let stmt = s.into_element().ast;
+                for printed in [stmt.to_ast_string(), stmt.to_ast_string_stable()] {
+                    let mut parsed = match parser::parse_statements(&printed) {
+                        Ok(parsed) => parsed.into_element().ast,
+                        Err(err) => panic!("reparse failed: {}: {}\n", stmt, err),
+                    };
+                    match (&mut parsed, &stmt) {
+                        // DECLARE remembers the original SQL. Erase that here so it can differ if
+                        // needed (for example, quoting identifiers vs not). This is ok because we
+                        // still compare that the resulting ASTs are identical, and it's valid for
+                        // those to come from different original strings.
+                        (Statement::Declare(parsed), Statement::Declare(stmt)) => {
+                            parsed.sql = stmt.sql.clone();
+                        }
+                        _ => {}
+                    }
+                    if parsed != stmt {
+                        panic!(
+                            "reparse comparison failed:\n{:?}\n!=\n{:?}\n{printed}\n",
+                            stmt, parsed
+                        );
+                    }
+                }
+                if tc.args.get("roundtrip").is_some() {
+                    format!("{}\n", stmt)
+                } else {
+                    // TODO(justin): it would be nice to have a middle-ground between this
+                    // all-on-one-line and {:#?}'s huge number of lines.
+                    format!("{}\n=>\n{:?}\n", stmt, stmt)
+                }
+            }
+            Err(e) => render_error(input, e.error),
+        }
+    }
+
+    fn parse_scalar(tc: &TestCase) -> String {
+        let input = tc.input.trim();
+        match parser::parse_expr(input) {
+            Ok(s) => {
+                for printed in [s.to_ast_string(), s.to_ast_string_stable()] {
+                    match parser::parse_expr(&printed) {
+                        Ok(parsed) => {
+                            // TODO: We always coerce the double colon operator into a Cast expr instead
+                            // of keeping it as an Op (see parse_pg_cast). Expr::Cast always prints
+                            // itself as double colon. We're thus unable to perfectly roundtrip
+                            // `CAST(..)`. We could fix this by keeping "::" as a binary operator and
+                            // teaching func.rs how to handle it, similar to how that file handles "~~"
+                            // (without the parser converting that operator directly into an
+                            // Expr::Like).
+                            if !matches!(parsed, Expr::Cast { .. }) {
+                                if parsed != s {
+                                    panic!(
+                                  "reparse comparison failed: {input} != {s}\n{:?}\n!=\n{:?}\n{printed}\n",
+                                  s, parsed
+                              );
+                                }
+                            }
+                        }
+                        Err(err) => panic!("reparse failed: {printed}: {err}\n{s:?}"),
+                    }
+                }
+
+                if tc.args.get("roundtrip").is_some() {
+                    format!("{}\n", s)
+                } else {
+                    // TODO(justin): it would be nice to have a middle-ground between this
+                    // all-on-one-line and {:#?}'s huge number of lines.
+                    format!("{:?}\n", s)
+                }
+            }
+            Err(e) => render_error(input, e),
+        }
+    }
+
+    match tc.directive.as_str() {
+        "parse-statement" => parse_statement(tc),
+        "parse-scalar" => parse_scalar(tc),
+        dir => panic!("unhandled directive {}", dir),
+    }
+}
diff --git a/src/sql-parser/tests/sqlparser_common.rs b/src/sql-parser/tests/sqlparser_common.rs
@@ -89,131 +89,21 @@
 use std::error::Error;
 use std::iter;
 
+use datadriven::walk;
 use itertools::Itertools;
-use mz_ore::collections::CollectionExt;
-use mz_ore::fmt::FormatBuffer;
 use mz_sql_parser::ast::display::AstDisplay;
 use mz_sql_parser::ast::visit::Visit;
 use mz_sql_parser::ast::visit_mut::{self, VisitMut};
-use mz_sql_parser::ast::{AstInfo, Expr, Ident, Raw, RawDataType, RawItemName, Statement};
+use mz_sql_parser::ast::{AstInfo, Expr, Ident, Raw, RawDataType, RawItemName};
+use mz_sql_parser::datadriven_testcase;
 use mz_sql_parser::parser::{
-    self, parse_statements, parse_statements_with_limit, ParserError, MAX_STATEMENT_BATCH_SIZE,
+    self, parse_statements, parse_statements_with_limit, MAX_STATEMENT_BATCH_SIZE,
 };
-use unicode_width::UnicodeWidthStr;
 
 #[mz_ore::test]
 #[cfg_attr(miri, ignore)] // unsupported operation: can't call foreign function `rust_psm_stack_pointer` on OS `linux`
 fn datadriven() {
-    use datadriven::{walk, TestCase};
-
-    fn render_error(sql: &str, e: ParserError) -> String {
-        let mut s = format!("error: {}\n", e.message);
-
-        // Do our best to emulate psql in rendering a caret pointing at the
-        // offending character in the query. This makes it possible to detect
-        // incorrect error positions by visually scanning the test files.
-        let end = sql.len();
-        let line_start = sql[..e.pos].rfind('\n').map(|p| p + 1).unwrap_or(0);
-        let line_end = sql[e.pos..].find('\n').map(|p| e.pos + p).unwrap_or(end);
-        writeln!(s, "{}", &sql[line_start..line_end]);
-        for _ in 0..sql[line_start..e.pos].width() {
-            write!(s, " ");
-        }
-        writeln!(s, "^");
-
-        s
-    }
-
-    fn parse_statement(tc: &TestCase) -> String {
-        let input = tc.input.strip_suffix('\n').unwrap_or(&tc.input);
-        match parser::parse_statements(input) {
-            Ok(s) => {
-                if s.len() != 1 {
-                    return "expected exactly one statement\n".to_string();
-                }
-                let stmt = s.into_element().ast;
-                for printed in [stmt.to_ast_string(), stmt.to_ast_string_stable()] {
-                    let mut parsed = match parser::parse_statements(&printed) {
-                        Ok(parsed) => parsed.into_element().ast,
-                        Err(err) => panic!("reparse failed: {}: {}\n", stmt, err),
-                    };
-                    match (&mut parsed, &stmt) {
-                        // DECLARE remembers the original SQL. Erase that here so it can differ if
-                        // needed (for example, quoting identifiers vs not). This is ok because we
-                        // still compare that the resulting ASTs are identical, and it's valid for
-                        // those to come from different original strings.
-                        (Statement::Declare(parsed), Statement::Declare(stmt)) => {
-                            parsed.sql = stmt.sql.clone();
-                        }
-                        _ => {}
-                    }
-                    if parsed != stmt {
-                        panic!(
-                            "reparse comparison failed:\n{:?}\n!=\n{:?}\n{printed}\n",
-                            stmt, parsed
-                        );
-                    }
-                }
-                if tc.args.get("roundtrip").is_some() {
-                    format!("{}\n", stmt)
-                } else {
-                    // TODO(justin): it would be nice to have a middle-ground between this
-                    // all-on-one-line and {:#?}'s huge number of lines.
-                    format!("{}\n=>\n{:?}\n", stmt, stmt)
-                }
-            }
-            Err(e) => render_error(input, e.error),
-        }
-    }
-
-    fn parse_scalar(tc: &TestCase) -> String {
-        let input = tc.input.trim();
-        match parser::parse_expr(input) {
-            Ok(s) => {
-                for printed in [s.to_ast_string(), s.to_ast_string_stable()] {
-                    match parser::parse_expr(&printed) {
-                        Ok(parsed) => {
-                            // TODO: We always coerce the double colon operator into a Cast expr instead
-                            // of keeping it as an Op (see parse_pg_cast). Expr::Cast always prints
-                            // itself as double colon. We're thus unable to perfectly roundtrip
-                            // `CAST(..)`. We could fix this by keeping "::" as a binary operator and
-                            // teaching func.rs how to handle it, similar to how that file handles "~~"
-                            // (without the parser converting that operator directly into an
-                            // Expr::Like).
-                            if !matches!(parsed, Expr::Cast { .. }) {
-                                if parsed != s {
-                                    panic!(
-                                    "reparse comparison failed: {input} != {s}\n{:?}\n!=\n{:?}\n{printed}\n",
-                                    s, parsed
-                                );
-                                }
-                            }
-                        }
-                        Err(err) => panic!("reparse failed: {printed}: {err}\n{s:?}"),
-                    }
-                }
-
-                if tc.args.get("roundtrip").is_some() {
-                    format!("{}\n", s)
-                } else {
-                    // TODO(justin): it would be nice to have a middle-ground between this
-                    // all-on-one-line and {:#?}'s huge number of lines.
-                    format!("{:?}\n", s)
-                }
-            }
-            Err(e) => render_error(input, e),
-        }
-    }
-
-    walk("tests/testdata", |f| {
-        f.run(|test_case| -> String {
-            match test_case.directive.as_str() {
-                "parse-statement" => parse_statement(test_case),
-                "parse-scalar" => parse_scalar(test_case),
-                dir => panic!("unhandled directive {}", dir),
-            }
-        })
-    });
+    walk("tests/testdata", |f| f.run(datadriven_testcase));
 }
 
 #[mz_ore::test]