diff --git a/TODO.md b/TODO.md new file mode 100644 index 000000000..9ad0b2b48 --- /dev/null +++ b/TODO.md @@ -0,0 +1,136 @@ +# TODO: Remaining Parser and Explain Issues + +## Current State + +- **Tests passing:** 5,197 (76.2%) +- **Tests skipped:** 1,627 (23.8%) + - Parser issues: ~675 + - Explain mismatches: ~637 + +## Parser Issues + +These require changes to `parser/parser.go`: + +### Table/Database Names Starting with Numbers +Tables and databases with names starting with digits fail to parse: +```sql +DROP TABLE IF EXISTS 03657_gby_overflow; +DROP DATABASE IF EXISTS 03710_database; +``` + +### FORMAT Null +The `FORMAT Null` clause is not recognized: +```sql +SELECT ... FORMAT Null; +``` + +### FETCH FIRST ... ROW ONLY +SQL standard fetch syntax is not supported: +```sql +SELECT ... FETCH FIRST 1 ROW ONLY; +``` + +### INSERT INTO FUNCTION +Function-based inserts are not supported: +```sql +INSERT INTO FUNCTION file('file.parquet') SELECT ...; +``` + +### WITH ... AS Subquery Aliases +Subquery aliases in FROM clauses with keyword `AS`: +```sql +SELECT * FROM (SELECT 1 x) AS alias; +``` + +### String Concatenation Operator || +The `||` operator in some contexts: +```sql +SELECT currentDatabase() || '_test' AS key; +``` + +### MOD/DIV Operators +The MOD and DIV keywords as operators: +```sql +SELECT number MOD 3, number DIV 3 FROM ...; +``` + +### Reserved Keyword Handling +Keywords like `LEFT`, `RIGHT` used as table aliases: +```sql +SELECT * FROM numbers(10) AS left RIGHT JOIN ...; +``` + +### Parameterized Settings +Settings with `$` parameters: +```sql +SET param_$1 = 'Hello'; +``` + +### Incomplete CASE Expression +CASE without END: +```sql +SELECT CASE number -- missing END +``` + +## Explain Output Issues + +These require changes to `internal/explain/`: + +### Double Equals (==) Operator +The `==` operator creates extra nested equals/tuple nodes: +```sql +SELECT value == '127.0.0.1:9181' +``` +Expected: `Function equals` with `Identifier` and `Literal` +Got: Nested `Function equals` with extra `Function tuple` + +### CreateQuery Spacing +Some ClickHouse versions output extra space before `(children`: +``` +CreateQuery d1 (children 1) -- two spaces +CreateQuery d1 (children 1) -- one space (our output) +``` + +### Server Error Messages in Expected Output +Some test expected outputs include trailing messages: +``` +The query succeeded but the server error '42' was expected +``` +These are not part of the actual EXPLAIN output. + +## Lower Priority + +### DateTime64 with Timezone +Type parameters with string timezone: +```sql +DateTime64(3,'UTC') +``` + +### Complex Type Expressions +Nested type expressions in column definitions: +```sql +CREATE TABLE t (c LowCardinality(UUID)); +``` + +### Parameterized Views +View definitions with parameters: +```sql +CREATE VIEW v AS SELECT ... WHERE x={parity:Int8}; +``` + +## Testing Notes + +Run tests with timeout to catch infinite loops: +```bash +go test ./parser -timeout 5s -v +``` + +Count test results: +```bash +go test ./parser -timeout 5s -v 2>&1 | grep -E 'PASS:|SKIP:' | cut -d':' -f1 | sort | uniq -c +``` + +View explain mismatches: +```bash +go test ./parser -timeout 5s -v 2>&1 | grep -A 30 "TODO: Explain output mismatch" | head -100 +``` diff --git a/ast/ast.go b/ast/ast.go index 1c86b8b48..2b61012d1 100644 --- a/ast/ast.go +++ b/ast/ast.go @@ -315,6 +315,7 @@ type DropQuery struct { Database string `json:"database,omitempty"` Table string `json:"table,omitempty"` View string `json:"view,omitempty"` + User string `json:"user,omitempty"` Temporary bool `json:"temporary,omitempty"` OnCluster string `json:"on_cluster,omitempty"` DropDatabase bool `json:"drop_database,omitempty"` @@ -449,6 +450,7 @@ const ( ShowCreateDB ShowType = "CREATE_DATABASE" ShowColumns ShowType = "COLUMNS" ShowDictionaries ShowType = "DICTIONARIES" + ShowFunctions ShowType = "FUNCTIONS" ) // ExplainQuery represents an EXPLAIN statement. diff --git a/internal/explain/explain.go b/internal/explain/explain.go new file mode 100644 index 000000000..e20ee4ed4 --- /dev/null +++ b/internal/explain/explain.go @@ -0,0 +1,173 @@ +// Package explain provides EXPLAIN AST output functionality for ClickHouse SQL. +package explain + +import ( + "fmt" + "strings" + + "github.com/kyleconroy/doubleclick/ast" +) + +// Explain returns the EXPLAIN AST output for a statement, matching ClickHouse's format. +func Explain(stmt ast.Statement) string { + var sb strings.Builder + Node(&sb, stmt, 0) + return sb.String() +} + +// Node writes the EXPLAIN AST output for an AST node. +func Node(sb *strings.Builder, node interface{}, depth int) { + if node == nil { + // nil can represent an empty tuple in function arguments + indent := strings.Repeat(" ", depth) + fmt.Fprintf(sb, "%sFunction tuple (children %d)\n", indent, 1) + fmt.Fprintf(sb, "%s ExpressionList\n", indent) + return + } + + indent := strings.Repeat(" ", depth) + + switch n := node.(type) { + // Select statements + case *ast.SelectWithUnionQuery: + explainSelectWithUnionQuery(sb, n, indent, depth) + case *ast.SelectQuery: + explainSelectQuery(sb, n, indent, depth) + + // Tables + case *ast.TablesInSelectQuery: + explainTablesInSelectQuery(sb, n, indent, depth) + case *ast.TablesInSelectQueryElement: + explainTablesInSelectQueryElement(sb, n, indent, depth) + case *ast.TableExpression: + explainTableExpression(sb, n, indent, depth) + case *ast.TableIdentifier: + explainTableIdentifier(sb, n, indent) + case *ast.ArrayJoinClause: + explainArrayJoinClause(sb, n, indent, depth) + case *ast.TableJoin: + explainTableJoin(sb, n, indent, depth) + + // Expressions + case *ast.OrderByElement: + explainOrderByElement(sb, n, indent, depth) + case *ast.Identifier: + explainIdentifier(sb, n, indent) + case *ast.Literal: + explainLiteral(sb, n, indent, depth) + case *ast.BinaryExpr: + explainBinaryExpr(sb, n, indent, depth) + case *ast.UnaryExpr: + explainUnaryExpr(sb, n, indent, depth) + case *ast.Subquery: + explainSubquery(sb, n, indent, depth) + case *ast.AliasedExpr: + explainAliasedExpr(sb, n, depth) + case *ast.Asterisk: + explainAsterisk(sb, n, indent) + + // Functions + case *ast.FunctionCall: + explainFunctionCall(sb, n, indent, depth) + case *ast.Lambda: + explainLambda(sb, n, indent, depth) + case *ast.CastExpr: + explainCastExpr(sb, n, indent, depth) + case *ast.InExpr: + explainInExpr(sb, n, indent, depth) + case *ast.TernaryExpr: + explainTernaryExpr(sb, n, indent, depth) + case *ast.ArrayAccess: + explainArrayAccess(sb, n, indent, depth) + case *ast.TupleAccess: + explainTupleAccess(sb, n, indent, depth) + case *ast.LikeExpr: + explainLikeExpr(sb, n, indent, depth) + case *ast.BetweenExpr: + explainBetweenExpr(sb, n, indent, depth) + case *ast.IsNullExpr: + explainIsNullExpr(sb, n, indent, depth) + case *ast.CaseExpr: + explainCaseExpr(sb, n, indent, depth) + case *ast.IntervalExpr: + explainIntervalExpr(sb, n, indent, depth) + case *ast.ExistsExpr: + explainExistsExpr(sb, n, indent, depth) + case *ast.ExtractExpr: + explainExtractExpr(sb, n, indent, depth) + + // DDL statements + case *ast.CreateQuery: + explainCreateQuery(sb, n, indent, depth) + case *ast.DropQuery: + explainDropQuery(sb, n, indent) + case *ast.SetQuery: + explainSetQuery(sb, indent) + case *ast.SystemQuery: + explainSystemQuery(sb, indent) + case *ast.ExplainQuery: + explainExplainQuery(sb, n, indent, depth) + case *ast.ShowQuery: + explainShowQuery(sb, n, indent) + case *ast.UseQuery: + explainUseQuery(sb, n, indent) + case *ast.DescribeQuery: + explainDescribeQuery(sb, n, indent) + + // Types + case *ast.DataType: + explainDataType(sb, n, indent, depth) + case *ast.Parameter: + explainParameter(sb, n, indent) + + default: + // For unhandled types, just print the type name + fmt.Fprintf(sb, "%s%T\n", indent, node) + } +} + +// TablesWithArrayJoin handles FROM and ARRAY JOIN together as TablesInSelectQuery +func TablesWithArrayJoin(sb *strings.Builder, from *ast.TablesInSelectQuery, arrayJoin *ast.ArrayJoinClause, depth int) { + indent := strings.Repeat(" ", depth) + + tableCount := 0 + if from != nil { + tableCount = len(from.Tables) + } + if arrayJoin != nil { + tableCount++ + } + + fmt.Fprintf(sb, "%sTablesInSelectQuery (children %d)\n", indent, tableCount) + + if from != nil { + for _, t := range from.Tables { + Node(sb, t, depth+1) + } + } + + if arrayJoin != nil { + // ARRAY JOIN is wrapped in TablesInSelectQueryElement + fmt.Fprintf(sb, "%s TablesInSelectQueryElement (children %d)\n", indent, 1) + Node(sb, arrayJoin, depth+2) + } +} + +// Column handles column declarations +func Column(sb *strings.Builder, col *ast.ColumnDeclaration, depth int) { + indent := strings.Repeat(" ", depth) + children := 0 + if col.Type != nil { + children++ + } + if col.Default != nil { + children++ + } + fmt.Fprintf(sb, "%sColumnDeclaration %s (children %d)\n", indent, col.Name, children) + if col.Type != nil { + Node(sb, col.Type, depth+1) + } + if col.Default != nil { + Node(sb, col.Default, depth+1) + } +} diff --git a/internal/explain/expressions.go b/internal/explain/expressions.go new file mode 100644 index 000000000..1ef099373 --- /dev/null +++ b/internal/explain/expressions.go @@ -0,0 +1,151 @@ +package explain + +import ( + "fmt" + "strings" + + "github.com/kyleconroy/doubleclick/ast" +) + +func explainIdentifier(sb *strings.Builder, n *ast.Identifier, indent string) { + name := n.Name() + if n.Alias != "" { + fmt.Fprintf(sb, "%sIdentifier %s (alias %s)\n", indent, name, n.Alias) + } else { + fmt.Fprintf(sb, "%sIdentifier %s\n", indent, name) + } +} + +func explainLiteral(sb *strings.Builder, n *ast.Literal, indent string, depth int) { + // Check if this is a tuple - either with expressions or empty + if n.Type == ast.LiteralTuple { + if exprs, ok := n.Value.([]ast.Expression); ok { + // Check if empty tuple or has complex expressions + if len(exprs) == 0 { + // Empty tuple renders as Function tuple with empty ExpressionList + fmt.Fprintf(sb, "%sFunction tuple (children %d)\n", indent, 1) + fmt.Fprintf(sb, "%s ExpressionList\n", indent) + return + } + hasComplexExpr := false + for _, e := range exprs { + if _, isLit := e.(*ast.Literal); !isLit { + hasComplexExpr = true + break + } + } + if hasComplexExpr { + // Render as Function tuple instead of Literal + fmt.Fprintf(sb, "%sFunction tuple (children %d)\n", indent, 1) + fmt.Fprintf(sb, "%s ExpressionList (children %d)\n", indent, len(exprs)) + for _, e := range exprs { + Node(sb, e, depth+2) + } + return + } + } else if n.Value == nil { + // nil value means empty tuple + fmt.Fprintf(sb, "%sFunction tuple (children %d)\n", indent, 1) + fmt.Fprintf(sb, "%s ExpressionList\n", indent) + return + } + } + // Check if this is an array with complex expressions or empty that should be rendered as Function array + if n.Type == ast.LiteralArray { + if exprs, ok := n.Value.([]ast.Expression); ok { + // Empty array renders as Function array with empty ExpressionList + if len(exprs) == 0 { + fmt.Fprintf(sb, "%sFunction array (children %d)\n", indent, 1) + fmt.Fprintf(sb, "%s ExpressionList\n", indent) + return + } + hasComplexExpr := false + for _, e := range exprs { + if _, isLit := e.(*ast.Literal); !isLit { + hasComplexExpr = true + break + } + } + if hasComplexExpr { + // Render as Function array instead of Literal + fmt.Fprintf(sb, "%sFunction array (children %d)\n", indent, 1) + fmt.Fprintf(sb, "%s ExpressionList (children %d)\n", indent, len(exprs)) + for _, e := range exprs { + Node(sb, e, depth+2) + } + return + } + } else if n.Value == nil { + // nil value means empty array + fmt.Fprintf(sb, "%sFunction array (children %d)\n", indent, 1) + fmt.Fprintf(sb, "%s ExpressionList\n", indent) + return + } + } + fmt.Fprintf(sb, "%sLiteral %s\n", indent, FormatLiteral(n)) +} + +func explainBinaryExpr(sb *strings.Builder, n *ast.BinaryExpr, indent string, depth int) { + // Convert operator to function name + fnName := OperatorToFunction(n.Op) + fmt.Fprintf(sb, "%sFunction %s (children %d)\n", indent, fnName, 1) + fmt.Fprintf(sb, "%s ExpressionList (children %d)\n", indent, 2) + Node(sb, n.Left, depth+2) + Node(sb, n.Right, depth+2) +} + +func explainUnaryExpr(sb *strings.Builder, n *ast.UnaryExpr, indent string, depth int) { + fnName := UnaryOperatorToFunction(n.Op) + fmt.Fprintf(sb, "%sFunction %s (children %d)\n", indent, fnName, 1) + fmt.Fprintf(sb, "%s ExpressionList (children %d)\n", indent, 1) + Node(sb, n.Operand, depth+2) +} + +func explainSubquery(sb *strings.Builder, n *ast.Subquery, indent string, depth int) { + children := 1 + fmt.Fprintf(sb, "%sSubquery (children %d)\n", indent, children) + Node(sb, n.Query, depth+1) +} + +func explainAliasedExpr(sb *strings.Builder, n *ast.AliasedExpr, depth int) { + // For aliased expressions, we need to show the underlying expression with the alias + indent := strings.Repeat(" ", depth) + + switch e := n.Expr.(type) { + case *ast.Literal: + // Check if this is a tuple with complex expressions that should be rendered as Function tuple + if e.Type == ast.LiteralTuple { + if exprs, ok := e.Value.([]ast.Expression); ok { + hasComplexExpr := false + for _, expr := range exprs { + if _, isLit := expr.(*ast.Literal); !isLit { + hasComplexExpr = true + break + } + } + if hasComplexExpr { + // Render as Function tuple with alias + fmt.Fprintf(sb, "%sFunction tuple (alias %s) (children %d)\n", indent, n.Alias, 1) + fmt.Fprintf(sb, "%s ExpressionList (children %d)\n", indent, len(exprs)) + for _, expr := range exprs { + Node(sb, expr, depth+2) + } + return + } + } + } + fmt.Fprintf(sb, "%sLiteral %s (alias %s)\n", indent, FormatLiteral(e), n.Alias) + default: + // For other types, recursively explain and add alias info + Node(sb, n.Expr, depth) + } +} + +func explainAsterisk(sb *strings.Builder, n *ast.Asterisk, indent string) { + if n.Table != "" { + fmt.Fprintf(sb, "%sQualifiedAsterisk (children %d)\n", indent, 1) + fmt.Fprintf(sb, "%s Identifier %s\n", indent, n.Table) + } else { + fmt.Fprintf(sb, "%sAsterisk\n", indent) + } +} diff --git a/internal/explain/format.go b/internal/explain/format.go new file mode 100644 index 000000000..6a0fed621 --- /dev/null +++ b/internal/explain/format.go @@ -0,0 +1,212 @@ +package explain + +import ( + "fmt" + "strings" + + "github.com/kyleconroy/doubleclick/ast" +) + +// FormatLiteral formats a literal value for EXPLAIN AST output +func FormatLiteral(lit *ast.Literal) string { + switch lit.Type { + case ast.LiteralInteger: + val := lit.Value.(int64) + if val >= 0 { + return fmt.Sprintf("UInt64_%d", val) + } + return fmt.Sprintf("Int64_%d", val) + case ast.LiteralFloat: + val := lit.Value.(float64) + return fmt.Sprintf("Float64_%v", val) + case ast.LiteralString: + s := lit.Value.(string) + // Escape backslashes in strings + s = strings.ReplaceAll(s, "\\", "\\\\") + return fmt.Sprintf("\\'%s\\'", s) + case ast.LiteralBoolean: + if lit.Value.(bool) { + return "UInt8_1" + } + return "UInt8_0" + case ast.LiteralNull: + return "NULL" + case ast.LiteralArray: + return formatArrayLiteral(lit.Value) + case ast.LiteralTuple: + return formatTupleLiteral(lit.Value) + default: + return fmt.Sprintf("%v", lit.Value) + } +} + +// formatArrayLiteral formats an array literal for EXPLAIN AST output +func formatArrayLiteral(val interface{}) string { + exprs, ok := val.([]ast.Expression) + if !ok { + return "Array_[]" + } + var parts []string + for _, e := range exprs { + if lit, ok := e.(*ast.Literal); ok { + parts = append(parts, FormatLiteral(lit)) + } else if ident, ok := e.(*ast.Identifier); ok { + parts = append(parts, ident.Name()) + } else { + parts = append(parts, fmt.Sprintf("%v", e)) + } + } + return fmt.Sprintf("Array_[%s]", strings.Join(parts, ", ")) +} + +// formatTupleLiteral formats a tuple literal for EXPLAIN AST output +func formatTupleLiteral(val interface{}) string { + exprs, ok := val.([]ast.Expression) + if !ok { + return "Tuple_()" + } + var parts []string + for _, e := range exprs { + if lit, ok := e.(*ast.Literal); ok { + parts = append(parts, FormatLiteral(lit)) + } else if ident, ok := e.(*ast.Identifier); ok { + parts = append(parts, ident.Name()) + } else { + parts = append(parts, fmt.Sprintf("%v", e)) + } + } + return fmt.Sprintf("Tuple_(%s)", strings.Join(parts, ", ")) +} + +// FormatDataType formats a DataType for EXPLAIN AST output +func FormatDataType(dt *ast.DataType) string { + if dt == nil { + return "" + } + if len(dt.Parameters) == 0 { + return dt.Name + } + var params []string + for _, p := range dt.Parameters { + if lit, ok := p.(*ast.Literal); ok { + if lit.Type == ast.LiteralString { + // String parameters in type need extra escaping: 'val' -> \\\'val\\\' + params = append(params, fmt.Sprintf("\\\\\\'%s\\\\\\'", lit.Value)) + } else { + params = append(params, fmt.Sprintf("%v", lit.Value)) + } + } else if nested, ok := p.(*ast.DataType); ok { + params = append(params, FormatDataType(nested)) + } else { + params = append(params, fmt.Sprintf("%v", p)) + } + } + return fmt.Sprintf("%s(%s)", dt.Name, strings.Join(params, ", ")) +} + +// NormalizeFunctionName normalizes function names to match ClickHouse's EXPLAIN AST output +func NormalizeFunctionName(name string) string { + // ClickHouse normalizes certain function names in EXPLAIN AST + normalized := map[string]string{ + "ltrim": "trimLeft", + "rtrim": "trimRight", + "lcase": "lower", + "ucase": "upper", + "mid": "substring", + "substr": "substring", + "pow": "power", + "ceil": "ceiling", + "ln": "log", + "log10": "log10", + "log2": "log2", + "rand": "rand", + "ifnull": "ifNull", + "nullif": "nullIf", + "coalesce": "coalesce", + "greatest": "greatest", + "least": "least", + "concat_ws": "concat", + "length": "length", + "char_length": "length", + } + if n, ok := normalized[strings.ToLower(name)]; ok { + return n + } + return name +} + +// OperatorToFunction maps binary operators to ClickHouse function names +func OperatorToFunction(op string) string { + switch op { + case "+": + return "plus" + case "-": + return "minus" + case "*": + return "multiply" + case "/": + return "divide" + case "%": + return "modulo" + case "=", "==": + return "equals" + case "!=", "<>": + return "notEquals" + case "<": + return "less" + case ">": + return "greater" + case "<=": + return "lessOrEquals" + case ">=": + return "greaterOrEquals" + case "AND": + return "and" + case "OR": + return "or" + case "||": + return "concat" + default: + return strings.ToLower(op) + } +} + +// UnaryOperatorToFunction maps unary operators to ClickHouse function names +func UnaryOperatorToFunction(op string) string { + switch op { + case "-": + return "negate" + case "NOT": + return "not" + default: + return strings.ToLower(op) + } +} + +// formatExprAsString formats an expression as a string literal for :: cast syntax +func formatExprAsString(expr ast.Expression) string { + switch e := expr.(type) { + case *ast.Literal: + switch e.Type { + case ast.LiteralInteger: + return fmt.Sprintf("%d", e.Value) + case ast.LiteralFloat: + return fmt.Sprintf("%v", e.Value) + case ast.LiteralString: + return e.Value.(string) + case ast.LiteralBoolean: + if e.Value.(bool) { + return "true" + } + return "false" + case ast.LiteralNull: + return "NULL" + default: + return fmt.Sprintf("%v", e.Value) + } + case *ast.Identifier: + return e.Name() + default: + return fmt.Sprintf("%v", expr) + } +} diff --git a/internal/explain/functions.go b/internal/explain/functions.go new file mode 100644 index 000000000..df49cbdbe --- /dev/null +++ b/internal/explain/functions.go @@ -0,0 +1,224 @@ +package explain + +import ( + "fmt" + "strings" + + "github.com/kyleconroy/doubleclick/ast" +) + +func explainFunctionCall(sb *strings.Builder, n *ast.FunctionCall, indent string, depth int) { + children := 1 // arguments ExpressionList + if len(n.Parameters) > 0 { + children++ // parameters ExpressionList + } + // Normalize function name + fnName := NormalizeFunctionName(n.Name) + if n.Alias != "" { + fmt.Fprintf(sb, "%sFunction %s (alias %s) (children %d)\n", indent, fnName, n.Alias, children) + } else { + fmt.Fprintf(sb, "%sFunction %s (children %d)\n", indent, fnName, children) + } + // Arguments + fmt.Fprintf(sb, "%s ExpressionList", indent) + if len(n.Arguments) > 0 { + fmt.Fprintf(sb, " (children %d)", len(n.Arguments)) + } + fmt.Fprintln(sb) + for _, arg := range n.Arguments { + Node(sb, arg, depth+2) + } + // Parameters (for parametric functions) + if len(n.Parameters) > 0 { + fmt.Fprintf(sb, "%s ExpressionList (children %d)\n", indent, len(n.Parameters)) + for _, p := range n.Parameters { + Node(sb, p, depth+2) + } + } +} + +func explainLambda(sb *strings.Builder, n *ast.Lambda, indent string, depth int) { + // Lambda is represented as Function lambda with tuple of params and body + fmt.Fprintf(sb, "%sFunction lambda (children %d)\n", indent, 1) + fmt.Fprintf(sb, "%s ExpressionList (children %d)\n", indent, 2) + // Parameters as tuple + fmt.Fprintf(sb, "%s Function tuple (children %d)\n", indent, 1) + fmt.Fprintf(sb, "%s ExpressionList (children %d)\n", indent, len(n.Parameters)) + for _, p := range n.Parameters { + fmt.Fprintf(sb, "%s Identifier %s\n", indent, p) + } + // Body + Node(sb, n.Body, depth+2) +} + +func explainCastExpr(sb *strings.Builder, n *ast.CastExpr, indent string, depth int) { + // CAST is represented as Function CAST with expr and type as arguments + fmt.Fprintf(sb, "%sFunction CAST (children %d)\n", indent, 1) + fmt.Fprintf(sb, "%s ExpressionList (children %d)\n", indent, 2) + // For :: operator syntax, expression is represented as string literal + if n.OperatorSyntax { + // Format expression as string literal + exprStr := formatExprAsString(n.Expr) + fmt.Fprintf(sb, "%s Literal \\'%s\\'\n", indent, exprStr) + } else { + Node(sb, n.Expr, depth+2) + } + // Type is formatted as a literal string + typeStr := FormatDataType(n.Type) + fmt.Fprintf(sb, "%s Literal \\'%s\\'\n", indent, typeStr) +} + +func explainInExpr(sb *strings.Builder, n *ast.InExpr, indent string, depth int) { + // IN is represented as Function in + fnName := "in" + if n.Not { + fnName = "notIn" + } + if n.Global { + fnName = "global" + strings.Title(fnName) + } + fmt.Fprintf(sb, "%sFunction %s (children %d)\n", indent, fnName, 1) + // Count arguments: expr + list items or subquery + argCount := 1 + if n.Query != nil { + argCount++ + } else { + argCount += len(n.List) + } + fmt.Fprintf(sb, "%s ExpressionList (children %d)\n", indent, argCount) + Node(sb, n.Expr, depth+2) + if n.Query != nil { + // Subqueries in IN should be wrapped in Subquery node + fmt.Fprintf(sb, "%s Subquery (children %d)\n", indent, 1) + Node(sb, n.Query, depth+3) + } else { + for _, item := range n.List { + Node(sb, item, depth+2) + } + } +} + +func explainTernaryExpr(sb *strings.Builder, n *ast.TernaryExpr, indent string, depth int) { + // Ternary is represented as Function if with 3 arguments + fmt.Fprintf(sb, "%sFunction if (children %d)\n", indent, 1) + fmt.Fprintf(sb, "%s ExpressionList (children %d)\n", indent, 3) + Node(sb, n.Condition, depth+2) + Node(sb, n.Then, depth+2) + Node(sb, n.Else, depth+2) +} + +func explainArrayAccess(sb *strings.Builder, n *ast.ArrayAccess, indent string, depth int) { + // Array access is represented as Function arrayElement + fmt.Fprintf(sb, "%sFunction arrayElement (children %d)\n", indent, 1) + fmt.Fprintf(sb, "%s ExpressionList (children %d)\n", indent, 2) + Node(sb, n.Array, depth+2) + Node(sb, n.Index, depth+2) +} + +func explainTupleAccess(sb *strings.Builder, n *ast.TupleAccess, indent string, depth int) { + // Tuple access is represented as Function tupleElement + fmt.Fprintf(sb, "%sFunction tupleElement (children %d)\n", indent, 1) + fmt.Fprintf(sb, "%s ExpressionList (children %d)\n", indent, 2) + Node(sb, n.Tuple, depth+2) + Node(sb, n.Index, depth+2) +} + +func explainLikeExpr(sb *strings.Builder, n *ast.LikeExpr, indent string, depth int) { + // LIKE is represented as Function like + fnName := "like" + if n.CaseInsensitive { + fnName = "ilike" + } + if n.Not { + fnName = "not" + strings.Title(fnName) + } + fmt.Fprintf(sb, "%sFunction %s (children %d)\n", indent, fnName, 1) + fmt.Fprintf(sb, "%s ExpressionList (children %d)\n", indent, 2) + Node(sb, n.Expr, depth+2) + Node(sb, n.Pattern, depth+2) +} + +func explainBetweenExpr(sb *strings.Builder, n *ast.BetweenExpr, indent string, depth int) { + // BETWEEN is represented as Function and with two comparisons + // But for explain, we can use a simpler form + fnName := "between" + if n.Not { + fnName = "notBetween" + } + fmt.Fprintf(sb, "%sFunction %s (children %d)\n", indent, fnName, 1) + fmt.Fprintf(sb, "%s ExpressionList (children %d)\n", indent, 3) + Node(sb, n.Expr, depth+2) + Node(sb, n.Low, depth+2) + Node(sb, n.High, depth+2) +} + +func explainIsNullExpr(sb *strings.Builder, n *ast.IsNullExpr, indent string, depth int) { + // IS NULL is represented as Function isNull + fnName := "isNull" + if n.Not { + fnName = "isNotNull" + } + fmt.Fprintf(sb, "%sFunction %s (children %d)\n", indent, fnName, 1) + fmt.Fprintf(sb, "%s ExpressionList (children %d)\n", indent, 1) + Node(sb, n.Expr, depth+2) +} + +func explainCaseExpr(sb *strings.Builder, n *ast.CaseExpr, indent string, depth int) { + // CASE is represented as Function multiIf or caseWithExpression + if n.Operand != nil { + // CASE x WHEN ... form + argCount := 1 + len(n.Whens)*2 // operand + (condition, result) pairs + if n.Else != nil { + argCount++ + } + fmt.Fprintf(sb, "%sFunction caseWithExpression (children %d)\n", indent, 1) + fmt.Fprintf(sb, "%s ExpressionList (children %d)\n", indent, argCount) + Node(sb, n.Operand, depth+2) + for _, w := range n.Whens { + Node(sb, w.Condition, depth+2) + Node(sb, w.Result, depth+2) + } + if n.Else != nil { + Node(sb, n.Else, depth+2) + } + } else { + // CASE WHEN ... form + argCount := len(n.Whens) * 2 + if n.Else != nil { + argCount++ + } + fmt.Fprintf(sb, "%sFunction multiIf (children %d)\n", indent, 1) + fmt.Fprintf(sb, "%s ExpressionList (children %d)\n", indent, argCount) + for _, w := range n.Whens { + Node(sb, w.Condition, depth+2) + Node(sb, w.Result, depth+2) + } + if n.Else != nil { + Node(sb, n.Else, depth+2) + } + } +} + +func explainIntervalExpr(sb *strings.Builder, n *ast.IntervalExpr, indent string, depth int) { + // INTERVAL is represented as Function toInterval + fnName := "toInterval" + n.Unit + fmt.Fprintf(sb, "%sFunction %s (children %d)\n", indent, fnName, 1) + fmt.Fprintf(sb, "%s ExpressionList (children %d)\n", indent, 1) + Node(sb, n.Value, depth+2) +} + +func explainExistsExpr(sb *strings.Builder, n *ast.ExistsExpr, indent string, depth int) { + // EXISTS is represented as Function exists + fmt.Fprintf(sb, "%sFunction exists (children %d)\n", indent, 1) + fmt.Fprintf(sb, "%s ExpressionList (children %d)\n", indent, 1) + fmt.Fprintf(sb, "%s Subquery (children %d)\n", indent, 1) + Node(sb, n.Query, depth+3) +} + +func explainExtractExpr(sb *strings.Builder, n *ast.ExtractExpr, indent string, depth int) { + // EXTRACT is represented as Function toYear, toMonth, etc. + fnName := "to" + strings.Title(strings.ToLower(n.Field)) + fmt.Fprintf(sb, "%sFunction %s (children %d)\n", indent, fnName, 1) + fmt.Fprintf(sb, "%s ExpressionList (children %d)\n", indent, 1) + Node(sb, n.From, depth+2) +} diff --git a/internal/explain/select.go b/internal/explain/select.go new file mode 100644 index 000000000..52e15aa07 --- /dev/null +++ b/internal/explain/select.go @@ -0,0 +1,127 @@ +package explain + +import ( + "fmt" + "strings" + + "github.com/kyleconroy/doubleclick/ast" +) + +func explainSelectWithUnionQuery(sb *strings.Builder, n *ast.SelectWithUnionQuery, indent string, depth int) { + children := countSelectUnionChildren(n) + fmt.Fprintf(sb, "%sSelectWithUnionQuery (children %d)\n", indent, children) + // Wrap selects in ExpressionList + fmt.Fprintf(sb, "%s ExpressionList (children %d)\n", indent, len(n.Selects)) + for _, sel := range n.Selects { + Node(sb, sel, depth+2) + } + // FORMAT clause - check if any SelectQuery has Format set + for _, sel := range n.Selects { + if sq, ok := sel.(*ast.SelectQuery); ok && sq.Format != nil { + Node(sb, sq.Format, depth+1) + break + } + } +} + +func explainSelectQuery(sb *strings.Builder, n *ast.SelectQuery, indent string, depth int) { + children := countSelectQueryChildren(n) + fmt.Fprintf(sb, "%sSelectQuery (children %d)\n", indent, children) + // Columns (ExpressionList) + fmt.Fprintf(sb, "%s ExpressionList (children %d)\n", indent, len(n.Columns)) + for _, col := range n.Columns { + Node(sb, col, depth+2) + } + // FROM (including ARRAY JOIN as part of TablesInSelectQuery) + if n.From != nil || n.ArrayJoin != nil { + TablesWithArrayJoin(sb, n.From, n.ArrayJoin, depth+1) + } + // PREWHERE + if n.PreWhere != nil { + Node(sb, n.PreWhere, depth+1) + } + // WHERE + if n.Where != nil { + Node(sb, n.Where, depth+1) + } + // GROUP BY + if len(n.GroupBy) > 0 { + fmt.Fprintf(sb, "%s ExpressionList (children %d)\n", indent, len(n.GroupBy)) + for _, g := range n.GroupBy { + Node(sb, g, depth+2) + } + } + // HAVING + if n.Having != nil { + Node(sb, n.Having, depth+1) + } + // ORDER BY + if len(n.OrderBy) > 0 { + fmt.Fprintf(sb, "%s ExpressionList (children %d)\n", indent, len(n.OrderBy)) + for _, o := range n.OrderBy { + Node(sb, o, depth+2) + } + } + // LIMIT + if n.Limit != nil { + Node(sb, n.Limit, depth+1) + } + // OFFSET + if n.Offset != nil { + Node(sb, n.Offset, depth+1) + } + // SETTINGS + if len(n.Settings) > 0 { + fmt.Fprintf(sb, "%s Set\n", indent) + } +} + +func explainOrderByElement(sb *strings.Builder, n *ast.OrderByElement, indent string, depth int) { + fmt.Fprintf(sb, "%sOrderByElement (children %d)\n", indent, 1) + Node(sb, n.Expression, depth+1) +} + +func countSelectUnionChildren(n *ast.SelectWithUnionQuery) int { + count := 1 // ExpressionList of selects + // Check if any SelectQuery has Format set + for _, sel := range n.Selects { + if sq, ok := sel.(*ast.SelectQuery); ok && sq.Format != nil { + count++ + break + } + } + return count +} + +func countSelectQueryChildren(n *ast.SelectQuery) int { + count := 1 // columns ExpressionList + // FROM and ARRAY JOIN together count as one child (TablesInSelectQuery) + if n.From != nil || n.ArrayJoin != nil { + count++ + } + if n.PreWhere != nil { + count++ + } + if n.Where != nil { + count++ + } + if len(n.GroupBy) > 0 { + count++ + } + if n.Having != nil { + count++ + } + if len(n.OrderBy) > 0 { + count++ + } + if n.Limit != nil { + count++ + } + if n.Offset != nil { + count++ + } + if len(n.Settings) > 0 { + count++ + } + return count +} diff --git a/internal/explain/statements.go b/internal/explain/statements.go new file mode 100644 index 000000000..133ebcfdd --- /dev/null +++ b/internal/explain/statements.go @@ -0,0 +1,173 @@ +package explain + +import ( + "fmt" + "strings" + + "github.com/kyleconroy/doubleclick/ast" +) + +func explainCreateQuery(sb *strings.Builder, n *ast.CreateQuery, indent string, depth int) { + name := n.Table + if n.View != "" { + name = n.View + } + if n.CreateDatabase { + name = n.Database + } + // Count children: name + columns + engine/storage + children := 1 // name identifier + if len(n.Columns) > 0 { + children++ + } + if n.Engine != nil || len(n.OrderBy) > 0 || len(n.PrimaryKey) > 0 { + children++ + } + if n.AsSelect != nil { + children++ + } + fmt.Fprintf(sb, "%sCreateQuery %s (children %d)\n", indent, name, children) + fmt.Fprintf(sb, "%s Identifier %s\n", indent, name) + if len(n.Columns) > 0 { + fmt.Fprintf(sb, "%s Columns definition (children %d)\n", indent, 1) + fmt.Fprintf(sb, "%s ExpressionList (children %d)\n", indent, len(n.Columns)) + for _, col := range n.Columns { + Column(sb, col, depth+3) + } + } + if n.Engine != nil || len(n.OrderBy) > 0 || len(n.PrimaryKey) > 0 || len(n.Settings) > 0 { + storageChildren := 0 + if n.Engine != nil { + storageChildren++ + } + if len(n.OrderBy) > 0 { + storageChildren++ + } + if len(n.PrimaryKey) > 0 { + storageChildren++ + } + if len(n.Settings) > 0 { + storageChildren++ + } + fmt.Fprintf(sb, "%s Storage definition (children %d)\n", indent, storageChildren) + if n.Engine != nil { + if n.Engine.HasParentheses { + fmt.Fprintf(sb, "%s Function %s (children %d)\n", indent, n.Engine.Name, 1) + if len(n.Engine.Parameters) > 0 { + fmt.Fprintf(sb, "%s ExpressionList (children %d)\n", indent, len(n.Engine.Parameters)) + for _, param := range n.Engine.Parameters { + Node(sb, param, depth+4) + } + } else { + fmt.Fprintf(sb, "%s ExpressionList\n", indent) + } + } else { + fmt.Fprintf(sb, "%s Function %s\n", indent, n.Engine.Name) + } + } + if len(n.OrderBy) > 0 { + if len(n.OrderBy) == 1 { + if ident, ok := n.OrderBy[0].(*ast.Identifier); ok { + fmt.Fprintf(sb, "%s Identifier %s\n", indent, ident.Name()) + } else { + Node(sb, n.OrderBy[0], depth+2) + } + } else { + fmt.Fprintf(sb, "%s Function tuple (children %d)\n", indent, 1) + fmt.Fprintf(sb, "%s ExpressionList (children %d)\n", indent, len(n.OrderBy)) + for _, o := range n.OrderBy { + Node(sb, o, depth+4) + } + } + } + if len(n.Settings) > 0 { + fmt.Fprintf(sb, "%s Set\n", indent) + } + } + if n.AsSelect != nil { + // AS SELECT is output directly without Subquery wrapper + Node(sb, n.AsSelect, depth+1) + } +} + +func explainDropQuery(sb *strings.Builder, n *ast.DropQuery, indent string) { + // DROP USER has a special output format + if n.User != "" { + fmt.Fprintf(sb, "%sDROP USER query\n", indent) + return + } + name := n.Table + if n.View != "" { + name = n.View + } + if n.DropDatabase { + name = n.Database + } + fmt.Fprintf(sb, "%sDropQuery %s (children %d)\n", indent, name, 1) + fmt.Fprintf(sb, "%s Identifier %s\n", indent, name) +} + +func explainSetQuery(sb *strings.Builder, indent string) { + fmt.Fprintf(sb, "%sSet\n", indent) +} + +func explainSystemQuery(sb *strings.Builder, indent string) { + fmt.Fprintf(sb, "%sSYSTEM query\n", indent) +} + +func explainExplainQuery(sb *strings.Builder, n *ast.ExplainQuery, indent string, depth int) { + fmt.Fprintf(sb, "%sExplain %s (children %d)\n", indent, n.ExplainType, 1) + Node(sb, n.Statement, depth+1) +} + +func explainShowQuery(sb *strings.Builder, n *ast.ShowQuery, indent string) { + // Capitalize ShowType correctly for display + showType := strings.Title(strings.ToLower(string(n.ShowType))) + fmt.Fprintf(sb, "%sShow%s\n", indent, showType) +} + +func explainUseQuery(sb *strings.Builder, n *ast.UseQuery, indent string) { + fmt.Fprintf(sb, "%sUse %s\n", indent, n.Database) +} + +func explainDescribeQuery(sb *strings.Builder, n *ast.DescribeQuery, indent string) { + name := n.Table + if n.Database != "" { + name = n.Database + "." + n.Table + } + fmt.Fprintf(sb, "%sDescribe %s\n", indent, name) +} + +func explainDataType(sb *strings.Builder, n *ast.DataType, indent string, depth int) { + // Check if type has complex parameters (expressions, not just literals/types) + hasComplexParams := false + for _, p := range n.Parameters { + if _, ok := p.(*ast.Literal); ok { + continue + } + if _, ok := p.(*ast.DataType); ok { + continue + } + hasComplexParams = true + break + } + + if hasComplexParams && len(n.Parameters) > 0 { + // Complex parameters need to be output as children + fmt.Fprintf(sb, "%sDataType %s (children %d)\n", indent, n.Name, 1) + fmt.Fprintf(sb, "%s ExpressionList (children %d)\n", indent, len(n.Parameters)) + for _, p := range n.Parameters { + Node(sb, p, depth+2) + } + } else { + fmt.Fprintf(sb, "%sDataType %s\n", indent, FormatDataType(n)) + } +} + +func explainParameter(sb *strings.Builder, n *ast.Parameter, indent string) { + if n.Name != "" { + fmt.Fprintf(sb, "%sQueryParameter %s\n", indent, n.Name) + } else { + fmt.Fprintf(sb, "%sQueryParameter\n", indent) + } +} diff --git a/internal/explain/tables.go b/internal/explain/tables.go new file mode 100644 index 000000000..e70712016 --- /dev/null +++ b/internal/explain/tables.go @@ -0,0 +1,89 @@ +package explain + +import ( + "fmt" + "strings" + + "github.com/kyleconroy/doubleclick/ast" +) + +func explainTablesInSelectQuery(sb *strings.Builder, n *ast.TablesInSelectQuery, indent string, depth int) { + fmt.Fprintf(sb, "%sTablesInSelectQuery (children %d)\n", indent, len(n.Tables)) + for _, t := range n.Tables { + Node(sb, t, depth+1) + } +} + +func explainTablesInSelectQueryElement(sb *strings.Builder, n *ast.TablesInSelectQueryElement, indent string, depth int) { + children := 1 // table + if n.Join != nil { + children++ + } + fmt.Fprintf(sb, "%sTablesInSelectQueryElement (children %d)\n", indent, children) + if n.Table != nil { + Node(sb, n.Table, depth+1) + } + if n.Join != nil { + Node(sb, n.Join, depth+1) + } +} + +func explainTableExpression(sb *strings.Builder, n *ast.TableExpression, indent string, depth int) { + children := 1 // table + fmt.Fprintf(sb, "%sTableExpression (children %d)\n", indent, children) + // If there's a subquery with an alias, pass the alias to the subquery output + if subq, ok := n.Table.(*ast.Subquery); ok && n.Alias != "" { + fmt.Fprintf(sb, "%s Subquery (alias %s) (children %d)\n", indent, n.Alias, 1) + Node(sb, subq.Query, depth+2) + } else { + Node(sb, n.Table, depth+1) + } +} + +func explainTableIdentifier(sb *strings.Builder, n *ast.TableIdentifier, indent string) { + name := n.Table + if n.Database != "" { + name = n.Database + "." + n.Table + } + fmt.Fprintf(sb, "%sTableIdentifier %s\n", indent, name) +} + +func explainArrayJoinClause(sb *strings.Builder, n *ast.ArrayJoinClause, indent string, depth int) { + fmt.Fprintf(sb, "%sArrayJoin (children %d)\n", indent, 1) + fmt.Fprintf(sb, "%s ExpressionList", indent) + if len(n.Columns) > 0 { + fmt.Fprintf(sb, " (children %d)", len(n.Columns)) + } + fmt.Fprintln(sb) + for _, col := range n.Columns { + Node(sb, col, depth+2) + } +} + +func explainTableJoin(sb *strings.Builder, n *ast.TableJoin, indent string, depth int) { + // TableJoin is part of TablesInSelectQueryElement + joinType := strings.ToLower(string(n.Type)) + if n.Strictness != "" { + joinType = strings.ToLower(string(n.Strictness)) + " " + joinType + } + if n.Global { + joinType = "global " + joinType + } + children := 0 + if n.On != nil { + children++ + } + if len(n.Using) > 0 { + children++ + } + fmt.Fprintf(sb, "%sTableJoin %s (children %d)\n", indent, joinType, children) + if n.On != nil { + Node(sb, n.On, depth+1) + } + if len(n.Using) > 0 { + fmt.Fprintf(sb, "%s ExpressionList (children %d)\n", indent, len(n.Using)) + for _, u := range n.Using { + Node(sb, u, depth+2) + } + } +} diff --git a/parser/explain.go b/parser/explain.go index 485513e69..906efb8a6 100644 --- a/parser/explain.go +++ b/parser/explain.go @@ -1,384 +1,11 @@ package parser import ( - "fmt" - "strings" - "github.com/kyleconroy/doubleclick/ast" + "github.com/kyleconroy/doubleclick/internal/explain" ) // Explain returns the EXPLAIN AST output for a statement, matching ClickHouse's format. func Explain(stmt ast.Statement) string { - var sb strings.Builder - explainNode(&sb, stmt, 0) - return sb.String() -} - -// explainNode writes the EXPLAIN AST output for an AST node. -func explainNode(sb *strings.Builder, node interface{}, depth int) { - if node == nil { - return - } - - indent := strings.Repeat(" ", depth) - - switch n := node.(type) { - case *ast.SelectWithUnionQuery: - children := countChildren(n) - fmt.Fprintf(sb, "%sSelectWithUnionQuery (children %d)\n", indent, children) - // Wrap selects in ExpressionList - fmt.Fprintf(sb, "%s ExpressionList (children %d)\n", indent, len(n.Selects)) - for _, sel := range n.Selects { - explainNode(sb, sel, depth+2) - } - - case *ast.SelectQuery: - children := countSelectQueryChildren(n) - fmt.Fprintf(sb, "%sSelectQuery (children %d)\n", indent, children) - // Columns (ExpressionList) - fmt.Fprintf(sb, "%s ExpressionList (children %d)\n", indent, len(n.Columns)) - for _, col := range n.Columns { - explainNode(sb, col, depth+2) - } - // FROM - if n.From != nil { - explainNode(sb, n.From, depth+1) - } - // ARRAY JOIN - if n.ArrayJoin != nil { - explainNode(sb, n.ArrayJoin, depth+1) - } - // PREWHERE - if n.PreWhere != nil { - explainNode(sb, n.PreWhere, depth+1) - } - // WHERE - if n.Where != nil { - explainNode(sb, n.Where, depth+1) - } - // GROUP BY - if len(n.GroupBy) > 0 { - fmt.Fprintf(sb, "%s ExpressionList (children %d)\n", indent, len(n.GroupBy)) - for _, g := range n.GroupBy { - explainNode(sb, g, depth+2) - } - } - // HAVING - if n.Having != nil { - explainNode(sb, n.Having, depth+1) - } - // ORDER BY - if len(n.OrderBy) > 0 { - fmt.Fprintf(sb, "%s ExpressionList (children %d)\n", indent, len(n.OrderBy)) - for _, o := range n.OrderBy { - explainNode(sb, o, depth+2) - } - } - // LIMIT - if n.Limit != nil { - explainNode(sb, n.Limit, depth+1) - } - // OFFSET - if n.Offset != nil { - explainNode(sb, n.Offset, depth+1) - } - - case *ast.TablesInSelectQuery: - fmt.Fprintf(sb, "%sTablesInSelectQuery (children %d)\n", indent, len(n.Tables)) - for _, t := range n.Tables { - explainNode(sb, t, depth+1) - } - - case *ast.TablesInSelectQueryElement: - children := 1 // table - if n.Join != nil { - children++ - } - fmt.Fprintf(sb, "%sTablesInSelectQueryElement (children %d)\n", indent, children) - if n.Table != nil { - explainNode(sb, n.Table, depth+1) - } - if n.Join != nil { - explainNode(sb, n.Join, depth+1) - } - - case *ast.TableExpression: - children := 1 // table - if n.Alias != "" { - children++ - } - fmt.Fprintf(sb, "%sTableExpression (children %d)\n", indent, children) - explainNode(sb, n.Table, depth+1) - - case *ast.TableIdentifier: - name := n.Table - if n.Database != "" { - name = n.Database + "." + n.Table - } - fmt.Fprintf(sb, "%sTableIdentifier %s\n", indent, name) - - case *ast.ArrayJoinClause: - fmt.Fprintf(sb, "%sArrayJoin (children %d)\n", indent, 1) - fmt.Fprintf(sb, "%s ExpressionList", indent) - if len(n.Columns) > 0 { - fmt.Fprintf(sb, " (children %d)", len(n.Columns)) - } - fmt.Fprintln(sb) - for _, col := range n.Columns { - explainNode(sb, col, depth+2) - } - - case *ast.OrderByElement: - fmt.Fprintf(sb, "%sOrderByElement (children %d)\n", indent, 1) - explainNode(sb, n.Expression, depth+1) - - case *ast.Identifier: - name := n.Name() - if n.Alias != "" { - fmt.Fprintf(sb, "%sIdentifier %s (alias %s)\n", indent, name, n.Alias) - } else { - fmt.Fprintf(sb, "%sIdentifier %s\n", indent, name) - } - - case *ast.Literal: - fmt.Fprintf(sb, "%sLiteral %s\n", indent, formatLiteral(n)) - - case *ast.FunctionCall: - children := 1 // arguments ExpressionList - if len(n.Parameters) > 0 { - children++ // parameters ExpressionList - } - if n.Alias != "" { - fmt.Fprintf(sb, "%sFunction %s (alias %s) (children %d)\n", indent, n.Name, n.Alias, children) - } else { - fmt.Fprintf(sb, "%sFunction %s (children %d)\n", indent, n.Name, children) - } - // Arguments - fmt.Fprintf(sb, "%s ExpressionList", indent) - if len(n.Arguments) > 0 { - fmt.Fprintf(sb, " (children %d)", len(n.Arguments)) - } - fmt.Fprintln(sb) - for _, arg := range n.Arguments { - explainNode(sb, arg, depth+2) - } - // Parameters (for parametric functions) - if len(n.Parameters) > 0 { - fmt.Fprintf(sb, "%s ExpressionList (children %d)\n", indent, len(n.Parameters)) - for _, p := range n.Parameters { - explainNode(sb, p, depth+2) - } - } - - case *ast.BinaryExpr: - // Convert operator to function name - fnName := operatorToFunction(n.Op) - fmt.Fprintf(sb, "%sFunction %s (children %d)\n", indent, fnName, 1) - fmt.Fprintf(sb, "%s ExpressionList (children %d)\n", indent, 2) - explainNode(sb, n.Left, depth+2) - explainNode(sb, n.Right, depth+2) - - case *ast.UnaryExpr: - fnName := unaryOperatorToFunction(n.Op) - fmt.Fprintf(sb, "%sFunction %s (children %d)\n", indent, fnName, 1) - fmt.Fprintf(sb, "%s ExpressionList (children %d)\n", indent, 1) - explainNode(sb, n.Operand, depth+2) - - case *ast.Subquery: - children := 1 - fmt.Fprintf(sb, "%sSubquery (children %d)\n", indent, children) - explainNode(sb, n.Query, depth+1) - - case *ast.AliasedExpr: - explainAliasedExpr(sb, n, depth) - - case *ast.Lambda: - // Lambda is represented as Function lambda with tuple of params and body - fmt.Fprintf(sb, "%sFunction lambda (children %d)\n", indent, 1) - fmt.Fprintf(sb, "%s ExpressionList (children %d)\n", indent, 2) - // Parameters as tuple - fmt.Fprintf(sb, "%s Function tuple (children %d)\n", indent, 1) - fmt.Fprintf(sb, "%s ExpressionList (children %d)\n", indent, len(n.Parameters)) - for _, p := range n.Parameters { - fmt.Fprintf(sb, "%s Identifier %s\n", indent, p) - } - // Body - explainNode(sb, n.Body, depth+2) - - case *ast.SetQuery: - fmt.Fprintf(sb, "%sSet\n", indent) - - default: - // For unhandled types, just print the type name - fmt.Fprintf(sb, "%s%T\n", indent, node) - } -} - -// countChildren counts the children of a SelectWithUnionQuery -func countChildren(n *ast.SelectWithUnionQuery) int { - return 1 // ExpressionList of selects -} - -// countSelectQueryChildren counts the children of a SelectQuery -func countSelectQueryChildren(n *ast.SelectQuery) int { - count := 1 // columns ExpressionList - if n.From != nil { - count++ - } - if n.ArrayJoin != nil { - count++ - } - if n.PreWhere != nil { - count++ - } - if n.Where != nil { - count++ - } - if len(n.GroupBy) > 0 { - count++ - } - if n.Having != nil { - count++ - } - if len(n.OrderBy) > 0 { - count++ - } - if n.Limit != nil { - count++ - } - if n.Offset != nil { - count++ - } - return count -} - -// formatLiteral formats a literal value for EXPLAIN AST output -func formatLiteral(lit *ast.Literal) string { - switch lit.Type { - case ast.LiteralInteger: - val := lit.Value.(int64) - if val >= 0 { - return fmt.Sprintf("UInt64_%d", val) - } - return fmt.Sprintf("Int64_%d", val) - case ast.LiteralFloat: - val := lit.Value.(float64) - return fmt.Sprintf("Float64_%v", val) - case ast.LiteralString: - s := lit.Value.(string) - return fmt.Sprintf("\\'%s\\'", s) - case ast.LiteralBoolean: - if lit.Value.(bool) { - return "UInt8_1" - } - return "UInt8_0" - case ast.LiteralNull: - return "Null" - case ast.LiteralArray: - return formatArrayLiteral(lit.Value) - case ast.LiteralTuple: - return formatTupleLiteral(lit.Value) - default: - return fmt.Sprintf("%v", lit.Value) - } -} - -// formatArrayLiteral formats an array literal for EXPLAIN AST output -func formatArrayLiteral(val interface{}) string { - exprs, ok := val.([]ast.Expression) - if !ok { - return "Array_[]" - } - var parts []string - for _, e := range exprs { - if lit, ok := e.(*ast.Literal); ok { - parts = append(parts, formatLiteral(lit)) - } else if ident, ok := e.(*ast.Identifier); ok { - parts = append(parts, ident.Name()) - } else { - parts = append(parts, fmt.Sprintf("%v", e)) - } - } - return fmt.Sprintf("Array_[%s]", strings.Join(parts, ", ")) -} - -// formatTupleLiteral formats a tuple literal for EXPLAIN AST output -func formatTupleLiteral(val interface{}) string { - exprs, ok := val.([]ast.Expression) - if !ok { - return "Tuple_()" - } - var parts []string - for _, e := range exprs { - if lit, ok := e.(*ast.Literal); ok { - parts = append(parts, formatLiteral(lit)) - } else if ident, ok := e.(*ast.Identifier); ok { - parts = append(parts, ident.Name()) - } else { - parts = append(parts, fmt.Sprintf("%v", e)) - } - } - return fmt.Sprintf("Tuple_(%s)", strings.Join(parts, ", ")) -} - -// operatorToFunction maps binary operators to ClickHouse function names -func operatorToFunction(op string) string { - switch op { - case "+": - return "plus" - case "-": - return "minus" - case "*": - return "multiply" - case "/": - return "divide" - case "%": - return "modulo" - case "=", "==": - return "equals" - case "!=", "<>": - return "notEquals" - case "<": - return "less" - case ">": - return "greater" - case "<=": - return "lessOrEquals" - case ">=": - return "greaterOrEquals" - case "AND": - return "and" - case "OR": - return "or" - case "||": - return "concat" - default: - return strings.ToLower(op) - } -} - -// unaryOperatorToFunction maps unary operators to ClickHouse function names -func unaryOperatorToFunction(op string) string { - switch op { - case "-": - return "negate" - case "NOT": - return "not" - default: - return strings.ToLower(op) - } -} - -// explainAliasedExpr handles expressions with aliases -func explainAliasedExpr(sb *strings.Builder, n *ast.AliasedExpr, depth int) { - // For aliased expressions, we need to show the underlying expression with the alias - indent := strings.Repeat(" ", depth) - - switch e := n.Expr.(type) { - case *ast.Literal: - fmt.Fprintf(sb, "%sLiteral %s (alias %s)\n", indent, formatLiteral(e), n.Alias) - default: - // For other types, recursively explain and add alias info - explainNode(sb, n.Expr, depth) - } + return explain.Explain(stmt) } diff --git a/parser/parser.go b/parser/parser.go index 809b0f7fc..36af97046 100644 --- a/parser/parser.go +++ b/parser/parser.go @@ -1318,6 +1318,7 @@ func (p *Parser) parseDrop() *ast.DropQuery { } // What are we dropping? + dropUser := false switch p.current.Token { case token.TABLE: p.nextToken() @@ -1326,6 +1327,9 @@ func (p *Parser) parseDrop() *ast.DropQuery { p.nextToken() case token.VIEW: p.nextToken() + case token.USER: + dropUser = true + p.nextToken() default: p.nextToken() // skip unknown token } @@ -1356,7 +1360,9 @@ func (p *Parser) parseDrop() *ast.DropQuery { p.nextToken() } } else { - if drop.DropDatabase { + if dropUser { + drop.User = name + } else if drop.DropDatabase { drop.Database = name } else { drop.Table = name @@ -1794,7 +1800,7 @@ func (p *Parser) parseShow() *ast.ShowQuery { } } default: - // Handle SHOW PROCESSLIST, SHOW DICTIONARIES, etc. + // Handle SHOW PROCESSLIST, SHOW DICTIONARIES, SHOW FUNCTIONS, etc. if p.currentIs(token.IDENT) { upper := strings.ToUpper(p.current.Value) switch upper { @@ -1802,6 +1808,8 @@ func (p *Parser) parseShow() *ast.ShowQuery { show.ShowType = ast.ShowProcesses case "DICTIONARIES": show.ShowType = ast.ShowDictionaries + case "FUNCTIONS": + show.ShowType = ast.ShowFunctions } p.nextToken() } diff --git a/token/token.go b/token/token.go index ec61d19c3..e1b58dc0e 100644 --- a/token/token.go +++ b/token/token.go @@ -177,6 +177,7 @@ const ( UNION UPDATE USE + USER USING VALUES VIEW @@ -355,6 +356,7 @@ var tokens = [...]string{ UNION: "UNION", UPDATE: "UPDATE", USE: "USE", + USER: "USER", USING: "USING", VALUES: "VALUES", VIEW: "VIEW",