Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
46 changes: 9 additions & 37 deletions TODO.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@

## Current State

- **Tests passing:** 5,933 (86.9%)
- **Tests skipped:** 891 (13.1%)
- **Tests passing:** 6,006 (88.0%)
- **Tests skipped:** 819 (12.0%)

## Recently Fixed (explain layer)

Expand All @@ -18,27 +18,15 @@
- ✅ Aliased expression handling for binary/unary/function/identifier
- ✅ PARTITION BY support in CREATE TABLE
- ✅ Server error message stripping from expected output
- ✅ DROP TABLE with multiple tables (e.g., `DROP TABLE t1, t2, t3`)
- ✅ Negative integer/float literals (e.g., `-1` → `Literal Int64_-1`)
- ✅ Empty tuple in ORDER BY (e.g., `ORDER BY ()` → `Function tuple` with empty `ExpressionList`)
- ✅ String escape handling (lexer now unescapes `\'`, `\\`, `\n`, `\t`, `\0`, etc.)

## Parser Issues (High Priority)

These require changes to `parser/parser.go`:

### DROP TABLE with Multiple Tables
Parser only captures first table when multiple are specified:
```sql
DROP TABLE IF EXISTS t1, t2, t3;
-- Expected: ExpressionList with 3 TableIdentifiers
-- Got: Single Identifier for t1
```

### Negative Integer Literals
Negative numbers are parsed as `Function negate` instead of negative literals:
```sql
SELECT -1, -10000;
-- Expected: Literal Int64_-1
-- Got: Function negate (children 1) with Literal UInt64_1
```

### CREATE TABLE with INDEX Clause
INDEX definitions in CREATE TABLE are not captured:
```sql
Expand All @@ -59,22 +47,6 @@ CREATE TABLE t (c Int TTL expr()) ENGINE=MergeTree;
-- Expected: ColumnDeclaration with 2 children (type + TTL function)
```

### Empty Tuple in ORDER BY
`ORDER BY ()` should capture empty tuple expression:
```sql
CREATE TABLE t (...) ENGINE=MergeTree ORDER BY ();
-- Expected: Function tuple (children 1) with empty ExpressionList
-- Got: Storage definition with no ORDER BY
```

### String Escape Handling
Parser stores escaped characters literally instead of unescaping:
```sql
SELECT 'x\'e2\'';
-- Parser stores: x\'e2\' (with backslashes)
-- Should store: x'e2' (unescaped)
```

## Parser Issues (Medium Priority)

### CREATE DICTIONARY
Expand Down Expand Up @@ -163,11 +135,11 @@ SELECT 2.2250738585072014e-308;
```

### Array Literals with Negative Numbers
Arrays with negative integers expand to Function instead of Literal:
Arrays with negative integers may still expand to Function instead of Literal in some cases:
```sql
SELECT [-10000, 5750];
-- Expected: Literal Array_[Int64_-10000, UInt64_5750]
-- Got: Function array with Function negate for -10000
-- Some cases now work correctly with Literal Int64_-10000
-- Complex nested arrays may still require additional work
```

### WithElement for CTE Subqueries
Expand Down
21 changes: 11 additions & 10 deletions ast/ast.go
Original file line number Diff line number Diff line change
Expand Up @@ -328,16 +328,17 @@ func (t *TTLClause) End() token.Position { return t.Position }

// DropQuery represents a DROP statement.
type DropQuery struct {
Position token.Position `json:"-"`
IfExists bool `json:"if_exists,omitempty"`
Database string `json:"database,omitempty"`
Table string `json:"table,omitempty"`
View string `json:"view,omitempty"`
User string `json:"user,omitempty"`
Temporary bool `json:"temporary,omitempty"`
OnCluster string `json:"on_cluster,omitempty"`
DropDatabase bool `json:"drop_database,omitempty"`
Sync bool `json:"sync,omitempty"`
Position token.Position `json:"-"`
IfExists bool `json:"if_exists,omitempty"`
Database string `json:"database,omitempty"`
Table string `json:"table,omitempty"`
Tables []*TableIdentifier `json:"tables,omitempty"` // For DROP TABLE t1, t2, t3
View string `json:"view,omitempty"`
User string `json:"user,omitempty"`
Temporary bool `json:"temporary,omitempty"`
OnCluster string `json:"on_cluster,omitempty"`
DropDatabase bool `json:"drop_database,omitempty"`
Sync bool `json:"sync,omitempty"`
}

func (d *DropQuery) Pos() token.Position { return d.Position }
Expand Down
2 changes: 1 addition & 1 deletion internal/explain/explain.go
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@ func Node(sb *strings.Builder, node interface{}, depth int) {
case *ast.CreateQuery:
explainCreateQuery(sb, n, indent, depth)
case *ast.DropQuery:
explainDropQuery(sb, n, indent)
explainDropQuery(sb, n, indent, depth)
case *ast.SetQuery:
explainSetQuery(sb, indent)
case *ast.SystemQuery:
Expand Down
23 changes: 23 additions & 0 deletions internal/explain/expressions.go
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,29 @@ func collectConcatOperands(n *ast.BinaryExpr) []ast.Expression {
}

func explainUnaryExpr(sb *strings.Builder, n *ast.UnaryExpr, indent string, depth int) {
// Handle negate of literal numbers - output as negative literal instead of function
if n.Op == "-" {
if lit, ok := n.Operand.(*ast.Literal); ok {
switch lit.Type {
case ast.LiteralInteger:
// Convert positive integer to negative
switch val := lit.Value.(type) {
case int64:
fmt.Fprintf(sb, "%sLiteral Int64_%d\n", indent, -val)
return
case uint64:
fmt.Fprintf(sb, "%sLiteral Int64_-%d\n", indent, val)
return
}
case ast.LiteralFloat:
val := lit.Value.(float64)
s := FormatFloat(-val)
fmt.Fprintf(sb, "%sLiteral Float64_%s\n", indent, s)
return
}
}
}

fnName := UnaryOperatorToFunction(n.Op)
fmt.Fprintf(sb, "%sFunction %s (children %d)\n", indent, fnName, 1)
fmt.Fprintf(sb, "%s ExpressionList (children %d)\n", indent, 1)
Expand Down
43 changes: 38 additions & 5 deletions internal/explain/format.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,41 @@ import (
"github.com/kyleconroy/doubleclick/ast"
)

// FormatFloat formats a float value for EXPLAIN AST output
func FormatFloat(val float64) string {
// Use 'f' format to avoid scientific notation, -1 precision for smallest representation
return strconv.FormatFloat(val, 'f', -1, 64)
}

// escapeStringLiteral escapes special characters in a string for EXPLAIN AST output
// Uses double-escaping as ClickHouse EXPLAIN AST displays strings
func escapeStringLiteral(s string) string {
var sb strings.Builder
for _, r := range s {
switch r {
case '\\':
sb.WriteString("\\\\\\\\") // backslash becomes four backslashes (\\\\)
case '\'':
sb.WriteString("\\'")
case '\n':
sb.WriteString("\\\\n") // newline becomes \\n
case '\t':
sb.WriteString("\\\\t") // tab becomes \\t
case '\r':
sb.WriteString("\\\\r") // carriage return becomes \\r
case '\x00':
sb.WriteString("\\\\0") // null becomes \\0
case '\b':
sb.WriteString("\\\\b") // backspace becomes \\b
case '\f':
sb.WriteString("\\\\f") // form feed becomes \\f
default:
sb.WriteRune(r)
}
}
return sb.String()
}

// FormatLiteral formats a literal value for EXPLAIN AST output
func FormatLiteral(lit *ast.Literal) string {
switch lit.Type {
Expand All @@ -26,13 +61,11 @@ func FormatLiteral(lit *ast.Literal) string {
}
case ast.LiteralFloat:
val := lit.Value.(float64)
// Use 'f' format to avoid scientific notation, -1 precision for smallest representation
s := strconv.FormatFloat(val, 'f', -1, 64)
return fmt.Sprintf("Float64_%s", s)
return fmt.Sprintf("Float64_%s", FormatFloat(val))
case ast.LiteralString:
s := lit.Value.(string)
// Escape backslashes in strings
s = strings.ReplaceAll(s, "\\", "\\\\")
// Escape special characters for display
s = escapeStringLiteral(s)
return fmt.Sprintf("\\'%s\\'", s)
case ast.LiteralBoolean:
if lit.Value.(bool) {
Expand Down
37 changes: 36 additions & 1 deletion internal/explain/statements.go
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,18 @@ func explainCreateQuery(sb *strings.Builder, n *ast.CreateQuery, indent string,
if len(n.OrderBy) == 1 {
if ident, ok := n.OrderBy[0].(*ast.Identifier); ok {
fmt.Fprintf(sb, "%s Identifier %s\n", indent, ident.Name())
} else if lit, ok := n.OrderBy[0].(*ast.Literal); ok && lit.Type == ast.LiteralTuple {
// Handle tuple literal (including empty tuple from ORDER BY ())
exprs, _ := lit.Value.([]ast.Expression)
fmt.Fprintf(sb, "%s Function tuple (children %d)\n", indent, 1)
if len(exprs) > 0 {
fmt.Fprintf(sb, "%s ExpressionList (children %d)\n", indent, len(exprs))
for _, e := range exprs {
Node(sb, e, depth+4)
}
} else {
fmt.Fprintf(sb, "%s ExpressionList\n", indent)
}
} else {
Node(sb, n.OrderBy[0], depth+2)
}
Expand All @@ -135,6 +147,18 @@ func explainCreateQuery(sb *strings.Builder, n *ast.CreateQuery, indent string,
if len(n.PrimaryKey) == 1 {
if ident, ok := n.PrimaryKey[0].(*ast.Identifier); ok {
fmt.Fprintf(sb, "%s Identifier %s\n", indent, ident.Name())
} else if lit, ok := n.PrimaryKey[0].(*ast.Literal); ok && lit.Type == ast.LiteralTuple {
// Handle tuple literal (including empty tuple from PRIMARY KEY ())
exprs, _ := lit.Value.([]ast.Expression)
fmt.Fprintf(sb, "%s Function tuple (children %d)\n", indent, 1)
if len(exprs) > 0 {
fmt.Fprintf(sb, "%s ExpressionList (children %d)\n", indent, len(exprs))
for _, e := range exprs {
Node(sb, e, depth+4)
}
} else {
fmt.Fprintf(sb, "%s ExpressionList\n", indent)
}
} else {
Node(sb, n.PrimaryKey[0], depth+2)
}
Expand All @@ -156,12 +180,23 @@ func explainCreateQuery(sb *strings.Builder, n *ast.CreateQuery, indent string,
}
}

func explainDropQuery(sb *strings.Builder, n *ast.DropQuery, indent string) {
func explainDropQuery(sb *strings.Builder, n *ast.DropQuery, indent string, depth int) {
// DROP USER has a special output format
if n.User != "" {
fmt.Fprintf(sb, "%sDROP USER query\n", indent)
return
}

// Handle multiple tables: DROP TABLE t1, t2, t3
if len(n.Tables) > 1 {
fmt.Fprintf(sb, "%sDropQuery (children %d)\n", indent, 1)
fmt.Fprintf(sb, "%s ExpressionList (children %d)\n", indent, len(n.Tables))
for _, t := range n.Tables {
Node(sb, t, depth+2)
}
return
}

name := n.Table
if n.View != "" {
name = n.View
Expand Down
69 changes: 60 additions & 9 deletions lexer/lexer.go
Original file line number Diff line number Diff line change
Expand Up @@ -270,24 +270,62 @@ func (l *Lexer) readString(quote rune) Item {

for !l.eof {
if l.ch == quote {
// Check for escaped quote
// Check for escaped quote (e.g., '' becomes ')
if l.peekChar() == quote {
sb.WriteRune(l.ch)
l.readChar()
sb.WriteRune(l.ch)
l.readChar()
sb.WriteRune(l.ch) // Write one quote (the escaped result)
l.readChar() // skip first quote
l.readChar() // skip second quote
continue
}
l.readChar() // skip closing quote
break
}
if l.ch == '\\' {
sb.WriteRune(l.ch)
l.readChar()
if !l.eof {
sb.WriteRune(l.ch)
l.readChar() // consume backslash
if l.eof {
break
}
// Interpret escape sequence
switch l.ch {
case '\'':
sb.WriteRune('\'')
case '"':
sb.WriteRune('"')
case '\\':
sb.WriteRune('\\')
case 'n':
sb.WriteRune('\n')
case 't':
sb.WriteRune('\t')
case 'r':
sb.WriteRune('\r')
case '0':
sb.WriteRune('\x00')
case 'b':
sb.WriteRune('\b')
case 'f':
sb.WriteRune('\f')
case 'x':
// Hex escape: \xNN
l.readChar()
if l.eof {
break
}
hex1 := l.ch
l.readChar()
if l.eof {
sb.WriteRune(rune(hexValue(hex1)))
continue
}
hex2 := l.ch
// Convert hex digits to byte
val := hexValue(hex1)*16 + hexValue(hex2)
sb.WriteByte(byte(val))
default:
// Unknown escape, just write the character after backslash
sb.WriteRune(l.ch)
}
l.readChar()
continue
}
sb.WriteRune(l.ch)
Expand Down Expand Up @@ -428,6 +466,19 @@ func isHexDigit(ch rune) bool {
return unicode.IsDigit(ch) || (ch >= 'a' && ch <= 'f') || (ch >= 'A' && ch <= 'F')
}

func hexValue(ch rune) int {
if ch >= '0' && ch <= '9' {
return int(ch - '0')
}
if ch >= 'a' && ch <= 'f' {
return int(ch-'a') + 10
}
if ch >= 'A' && ch <= 'F' {
return int(ch-'A') + 10
}
return 0
}

func (l *Lexer) readIdentifier() Item {
pos := l.pos
var sb strings.Builder
Expand Down
Loading