diff --git a/TODO.md b/TODO.md index 22295f7daa..c338bd6e76 100644 --- a/TODO.md +++ b/TODO.md @@ -2,8 +2,8 @@ ## Current State -- **Tests passing:** 5,933 (86.9%) -- **Tests skipped:** 891 (13.1%) +- **Tests passing:** 6,006 (88.0%) +- **Tests skipped:** 819 (12.0%) ## Recently Fixed (explain layer) @@ -18,27 +18,15 @@ - ✅ Aliased expression handling for binary/unary/function/identifier - ✅ PARTITION BY support in CREATE TABLE - ✅ Server error message stripping from expected output +- ✅ DROP TABLE with multiple tables (e.g., `DROP TABLE t1, t2, t3`) +- ✅ Negative integer/float literals (e.g., `-1` → `Literal Int64_-1`) +- ✅ Empty tuple in ORDER BY (e.g., `ORDER BY ()` → `Function tuple` with empty `ExpressionList`) +- ✅ String escape handling (lexer now unescapes `\'`, `\\`, `\n`, `\t`, `\0`, etc.) ## Parser Issues (High Priority) These require changes to `parser/parser.go`: -### DROP TABLE with Multiple Tables -Parser only captures first table when multiple are specified: -```sql -DROP TABLE IF EXISTS t1, t2, t3; --- Expected: ExpressionList with 3 TableIdentifiers --- Got: Single Identifier for t1 -``` - -### Negative Integer Literals -Negative numbers are parsed as `Function negate` instead of negative literals: -```sql -SELECT -1, -10000; --- Expected: Literal Int64_-1 --- Got: Function negate (children 1) with Literal UInt64_1 -``` - ### CREATE TABLE with INDEX Clause INDEX definitions in CREATE TABLE are not captured: ```sql @@ -59,22 +47,6 @@ CREATE TABLE t (c Int TTL expr()) ENGINE=MergeTree; -- Expected: ColumnDeclaration with 2 children (type + TTL function) ``` -### Empty Tuple in ORDER BY -`ORDER BY ()` should capture empty tuple expression: -```sql -CREATE TABLE t (...) ENGINE=MergeTree ORDER BY (); --- Expected: Function tuple (children 1) with empty ExpressionList --- Got: Storage definition with no ORDER BY -``` - -### String Escape Handling -Parser stores escaped characters literally instead of unescaping: -```sql -SELECT 'x\'e2\''; --- Parser stores: x\'e2\' (with backslashes) --- Should store: x'e2' (unescaped) -``` - ## Parser Issues (Medium Priority) ### CREATE DICTIONARY @@ -163,11 +135,11 @@ SELECT 2.2250738585072014e-308; ``` ### Array Literals with Negative Numbers -Arrays with negative integers expand to Function instead of Literal: +Arrays with negative integers may still expand to Function instead of Literal in some cases: ```sql SELECT [-10000, 5750]; --- Expected: Literal Array_[Int64_-10000, UInt64_5750] --- Got: Function array with Function negate for -10000 +-- Some cases now work correctly with Literal Int64_-10000 +-- Complex nested arrays may still require additional work ``` ### WithElement for CTE Subqueries diff --git a/ast/ast.go b/ast/ast.go index 8484583db3..0f8898baeb 100644 --- a/ast/ast.go +++ b/ast/ast.go @@ -328,16 +328,17 @@ func (t *TTLClause) End() token.Position { return t.Position } // DropQuery represents a DROP statement. type DropQuery struct { - Position token.Position `json:"-"` - IfExists bool `json:"if_exists,omitempty"` - Database string `json:"database,omitempty"` - Table string `json:"table,omitempty"` - View string `json:"view,omitempty"` - User string `json:"user,omitempty"` - Temporary bool `json:"temporary,omitempty"` - OnCluster string `json:"on_cluster,omitempty"` - DropDatabase bool `json:"drop_database,omitempty"` - Sync bool `json:"sync,omitempty"` + Position token.Position `json:"-"` + IfExists bool `json:"if_exists,omitempty"` + Database string `json:"database,omitempty"` + Table string `json:"table,omitempty"` + Tables []*TableIdentifier `json:"tables,omitempty"` // For DROP TABLE t1, t2, t3 + View string `json:"view,omitempty"` + User string `json:"user,omitempty"` + Temporary bool `json:"temporary,omitempty"` + OnCluster string `json:"on_cluster,omitempty"` + DropDatabase bool `json:"drop_database,omitempty"` + Sync bool `json:"sync,omitempty"` } func (d *DropQuery) Pos() token.Position { return d.Position } diff --git a/internal/explain/explain.go b/internal/explain/explain.go index b8598eaa65..1432ad8d96 100644 --- a/internal/explain/explain.go +++ b/internal/explain/explain.go @@ -104,7 +104,7 @@ func Node(sb *strings.Builder, node interface{}, depth int) { case *ast.CreateQuery: explainCreateQuery(sb, n, indent, depth) case *ast.DropQuery: - explainDropQuery(sb, n, indent) + explainDropQuery(sb, n, indent, depth) case *ast.SetQuery: explainSetQuery(sb, indent) case *ast.SystemQuery: diff --git a/internal/explain/expressions.go b/internal/explain/expressions.go index c47824d5be..c73f4fc98a 100644 --- a/internal/explain/expressions.go +++ b/internal/explain/expressions.go @@ -132,6 +132,29 @@ func collectConcatOperands(n *ast.BinaryExpr) []ast.Expression { } func explainUnaryExpr(sb *strings.Builder, n *ast.UnaryExpr, indent string, depth int) { + // Handle negate of literal numbers - output as negative literal instead of function + if n.Op == "-" { + if lit, ok := n.Operand.(*ast.Literal); ok { + switch lit.Type { + case ast.LiteralInteger: + // Convert positive integer to negative + switch val := lit.Value.(type) { + case int64: + fmt.Fprintf(sb, "%sLiteral Int64_%d\n", indent, -val) + return + case uint64: + fmt.Fprintf(sb, "%sLiteral Int64_-%d\n", indent, val) + return + } + case ast.LiteralFloat: + val := lit.Value.(float64) + s := FormatFloat(-val) + fmt.Fprintf(sb, "%sLiteral Float64_%s\n", indent, s) + return + } + } + } + fnName := UnaryOperatorToFunction(n.Op) fmt.Fprintf(sb, "%sFunction %s (children %d)\n", indent, fnName, 1) fmt.Fprintf(sb, "%s ExpressionList (children %d)\n", indent, 1) diff --git a/internal/explain/format.go b/internal/explain/format.go index fac17a2c4c..f3e2241a0d 100644 --- a/internal/explain/format.go +++ b/internal/explain/format.go @@ -8,6 +8,41 @@ import ( "github.com/kyleconroy/doubleclick/ast" ) +// FormatFloat formats a float value for EXPLAIN AST output +func FormatFloat(val float64) string { + // Use 'f' format to avoid scientific notation, -1 precision for smallest representation + return strconv.FormatFloat(val, 'f', -1, 64) +} + +// escapeStringLiteral escapes special characters in a string for EXPLAIN AST output +// Uses double-escaping as ClickHouse EXPLAIN AST displays strings +func escapeStringLiteral(s string) string { + var sb strings.Builder + for _, r := range s { + switch r { + case '\\': + sb.WriteString("\\\\\\\\") // backslash becomes four backslashes (\\\\) + case '\'': + sb.WriteString("\\'") + case '\n': + sb.WriteString("\\\\n") // newline becomes \\n + case '\t': + sb.WriteString("\\\\t") // tab becomes \\t + case '\r': + sb.WriteString("\\\\r") // carriage return becomes \\r + case '\x00': + sb.WriteString("\\\\0") // null becomes \\0 + case '\b': + sb.WriteString("\\\\b") // backspace becomes \\b + case '\f': + sb.WriteString("\\\\f") // form feed becomes \\f + default: + sb.WriteRune(r) + } + } + return sb.String() +} + // FormatLiteral formats a literal value for EXPLAIN AST output func FormatLiteral(lit *ast.Literal) string { switch lit.Type { @@ -26,13 +61,11 @@ func FormatLiteral(lit *ast.Literal) string { } case ast.LiteralFloat: val := lit.Value.(float64) - // Use 'f' format to avoid scientific notation, -1 precision for smallest representation - s := strconv.FormatFloat(val, 'f', -1, 64) - return fmt.Sprintf("Float64_%s", s) + return fmt.Sprintf("Float64_%s", FormatFloat(val)) case ast.LiteralString: s := lit.Value.(string) - // Escape backslashes in strings - s = strings.ReplaceAll(s, "\\", "\\\\") + // Escape special characters for display + s = escapeStringLiteral(s) return fmt.Sprintf("\\'%s\\'", s) case ast.LiteralBoolean: if lit.Value.(bool) { diff --git a/internal/explain/statements.go b/internal/explain/statements.go index f35519d34e..74e60bf74e 100644 --- a/internal/explain/statements.go +++ b/internal/explain/statements.go @@ -120,6 +120,18 @@ func explainCreateQuery(sb *strings.Builder, n *ast.CreateQuery, indent string, if len(n.OrderBy) == 1 { if ident, ok := n.OrderBy[0].(*ast.Identifier); ok { fmt.Fprintf(sb, "%s Identifier %s\n", indent, ident.Name()) + } else if lit, ok := n.OrderBy[0].(*ast.Literal); ok && lit.Type == ast.LiteralTuple { + // Handle tuple literal (including empty tuple from ORDER BY ()) + exprs, _ := lit.Value.([]ast.Expression) + fmt.Fprintf(sb, "%s Function tuple (children %d)\n", indent, 1) + if len(exprs) > 0 { + fmt.Fprintf(sb, "%s ExpressionList (children %d)\n", indent, len(exprs)) + for _, e := range exprs { + Node(sb, e, depth+4) + } + } else { + fmt.Fprintf(sb, "%s ExpressionList\n", indent) + } } else { Node(sb, n.OrderBy[0], depth+2) } @@ -135,6 +147,18 @@ func explainCreateQuery(sb *strings.Builder, n *ast.CreateQuery, indent string, if len(n.PrimaryKey) == 1 { if ident, ok := n.PrimaryKey[0].(*ast.Identifier); ok { fmt.Fprintf(sb, "%s Identifier %s\n", indent, ident.Name()) + } else if lit, ok := n.PrimaryKey[0].(*ast.Literal); ok && lit.Type == ast.LiteralTuple { + // Handle tuple literal (including empty tuple from PRIMARY KEY ()) + exprs, _ := lit.Value.([]ast.Expression) + fmt.Fprintf(sb, "%s Function tuple (children %d)\n", indent, 1) + if len(exprs) > 0 { + fmt.Fprintf(sb, "%s ExpressionList (children %d)\n", indent, len(exprs)) + for _, e := range exprs { + Node(sb, e, depth+4) + } + } else { + fmt.Fprintf(sb, "%s ExpressionList\n", indent) + } } else { Node(sb, n.PrimaryKey[0], depth+2) } @@ -156,12 +180,23 @@ func explainCreateQuery(sb *strings.Builder, n *ast.CreateQuery, indent string, } } -func explainDropQuery(sb *strings.Builder, n *ast.DropQuery, indent string) { +func explainDropQuery(sb *strings.Builder, n *ast.DropQuery, indent string, depth int) { // DROP USER has a special output format if n.User != "" { fmt.Fprintf(sb, "%sDROP USER query\n", indent) return } + + // Handle multiple tables: DROP TABLE t1, t2, t3 + if len(n.Tables) > 1 { + fmt.Fprintf(sb, "%sDropQuery (children %d)\n", indent, 1) + fmt.Fprintf(sb, "%s ExpressionList (children %d)\n", indent, len(n.Tables)) + for _, t := range n.Tables { + Node(sb, t, depth+2) + } + return + } + name := n.Table if n.View != "" { name = n.View diff --git a/lexer/lexer.go b/lexer/lexer.go index 8215853b54..ca33357139 100644 --- a/lexer/lexer.go +++ b/lexer/lexer.go @@ -270,24 +270,62 @@ func (l *Lexer) readString(quote rune) Item { for !l.eof { if l.ch == quote { - // Check for escaped quote + // Check for escaped quote (e.g., '' becomes ') if l.peekChar() == quote { - sb.WriteRune(l.ch) - l.readChar() - sb.WriteRune(l.ch) - l.readChar() + sb.WriteRune(l.ch) // Write one quote (the escaped result) + l.readChar() // skip first quote + l.readChar() // skip second quote continue } l.readChar() // skip closing quote break } if l.ch == '\\' { - sb.WriteRune(l.ch) - l.readChar() - if !l.eof { - sb.WriteRune(l.ch) + l.readChar() // consume backslash + if l.eof { + break + } + // Interpret escape sequence + switch l.ch { + case '\'': + sb.WriteRune('\'') + case '"': + sb.WriteRune('"') + case '\\': + sb.WriteRune('\\') + case 'n': + sb.WriteRune('\n') + case 't': + sb.WriteRune('\t') + case 'r': + sb.WriteRune('\r') + case '0': + sb.WriteRune('\x00') + case 'b': + sb.WriteRune('\b') + case 'f': + sb.WriteRune('\f') + case 'x': + // Hex escape: \xNN l.readChar() + if l.eof { + break + } + hex1 := l.ch + l.readChar() + if l.eof { + sb.WriteRune(rune(hexValue(hex1))) + continue + } + hex2 := l.ch + // Convert hex digits to byte + val := hexValue(hex1)*16 + hexValue(hex2) + sb.WriteByte(byte(val)) + default: + // Unknown escape, just write the character after backslash + sb.WriteRune(l.ch) } + l.readChar() continue } sb.WriteRune(l.ch) @@ -428,6 +466,19 @@ func isHexDigit(ch rune) bool { return unicode.IsDigit(ch) || (ch >= 'a' && ch <= 'f') || (ch >= 'A' && ch <= 'F') } +func hexValue(ch rune) int { + if ch >= '0' && ch <= '9' { + return int(ch - '0') + } + if ch >= 'a' && ch <= 'f' { + return int(ch-'a') + 10 + } + if ch >= 'A' && ch <= 'F' { + return int(ch-'A') + 10 + } + return 0 +} + func (l *Lexer) readIdentifier() Item { pos := l.pos var sb strings.Builder diff --git a/parser/parser.go b/parser/parser.go index 9bc1c93524..2825ad6a7c 100644 --- a/parser/parser.go +++ b/parser/parser.go @@ -1095,9 +1095,21 @@ func (p *Parser) parseCreateTable(create *ast.CreateQuery) { p.nextToken() if p.expect(token.BY) { if p.currentIs(token.LPAREN) { + pos := p.current.Pos p.nextToken() - create.OrderBy = p.parseExpressionList() + exprs := p.parseExpressionList() p.expect(token.RPAREN) + // Store tuple literal for ORDER BY (expr1, expr2, ...) or ORDER BY () + if len(exprs) == 0 || len(exprs) > 1 { + create.OrderBy = []ast.Expression{&ast.Literal{ + Position: pos, + Type: ast.LiteralTuple, + Value: exprs, + }} + } else { + // Single expression in parentheses - just extract it + create.OrderBy = exprs + } } else { create.OrderBy = []ast.Expression{p.parseExpression(LOWEST)} } @@ -1106,9 +1118,21 @@ func (p *Parser) parseCreateTable(create *ast.CreateQuery) { p.nextToken() if p.expect(token.KEY) { if p.currentIs(token.LPAREN) { + pos := p.current.Pos p.nextToken() - create.PrimaryKey = p.parseExpressionList() + exprs := p.parseExpressionList() p.expect(token.RPAREN) + // Store tuple literal for PRIMARY KEY (expr1, expr2, ...) or PRIMARY KEY () + if len(exprs) == 0 || len(exprs) > 1 { + create.PrimaryKey = []ast.Expression{&ast.Literal{ + Position: pos, + Type: ast.LiteralTuple, + Value: exprs, + }} + } else { + // Single expression in parentheses - just extract it + create.PrimaryKey = exprs + } } else { create.PrimaryKey = []ast.Expression{p.parseExpression(LOWEST)} } @@ -1545,37 +1569,52 @@ func (p *Parser) parseDrop() *ast.DropQuery { // Parse name (can start with a number in ClickHouse) name := p.parseIdentifierName() if name != "" { + var database, tableName string if p.currentIs(token.DOT) { p.nextToken() - drop.Database = name - tableName := p.parseIdentifierName() - if tableName != "" { - if drop.DropDatabase { - drop.Database = tableName - } else { - drop.Table = tableName - } - } + database = name + tableName = p.parseIdentifierName() } else { - if dropUser { - drop.User = name - } else if drop.DropDatabase { - drop.Database = name - } else { - drop.Table = name + tableName = name + } + + if dropUser { + drop.User = tableName + } else if drop.DropDatabase { + drop.Database = tableName + } else { + // First table - add to Tables list + drop.Tables = append(drop.Tables, &ast.TableIdentifier{ + Position: drop.Position, + Database: database, + Table: tableName, + }) + drop.Table = tableName // Keep for backward compatibility + if database != "" { + drop.Database = database } } } // Handle multiple tables (DROP TABLE IF EXISTS t1, t2, t3) - // For now, just skip additional table names for p.currentIs(token.COMMA) { p.nextToken() - // Skip the table name (may be qualified like db.table) - p.parseIdentifierName() + pos := p.current.Pos + name := p.parseIdentifierName() + var database, tableName string if p.currentIs(token.DOT) { p.nextToken() - p.parseIdentifierName() + database = name + tableName = p.parseIdentifierName() + } else { + tableName = name + } + if tableName != "" { + drop.Tables = append(drop.Tables, &ast.TableIdentifier{ + Position: pos, + Database: database, + Table: tableName, + }) } }