Skip to content
Merged
136 changes: 136 additions & 0 deletions TODO.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,136 @@
# TODO: Remaining Parser and Explain Issues

## Current State

- **Tests passing:** 5,197 (76.2%)
- **Tests skipped:** 1,627 (23.8%)
- Parser issues: ~675
- Explain mismatches: ~637

## Parser Issues

These require changes to `parser/parser.go`:

### Table/Database Names Starting with Numbers
Tables and databases with names starting with digits fail to parse:
```sql
DROP TABLE IF EXISTS 03657_gby_overflow;
DROP DATABASE IF EXISTS 03710_database;
```

### FORMAT Null
The `FORMAT Null` clause is not recognized:
```sql
SELECT ... FORMAT Null;
```

### FETCH FIRST ... ROW ONLY
SQL standard fetch syntax is not supported:
```sql
SELECT ... FETCH FIRST 1 ROW ONLY;
```

### INSERT INTO FUNCTION
Function-based inserts are not supported:
```sql
INSERT INTO FUNCTION file('file.parquet') SELECT ...;
```

### WITH ... AS Subquery Aliases
Subquery aliases in FROM clauses with keyword `AS`:
```sql
SELECT * FROM (SELECT 1 x) AS alias;
```

### String Concatenation Operator ||
The `||` operator in some contexts:
```sql
SELECT currentDatabase() || '_test' AS key;
```

### MOD/DIV Operators
The MOD and DIV keywords as operators:
```sql
SELECT number MOD 3, number DIV 3 FROM ...;
```

### Reserved Keyword Handling
Keywords like `LEFT`, `RIGHT` used as table aliases:
```sql
SELECT * FROM numbers(10) AS left RIGHT JOIN ...;
```

### Parameterized Settings
Settings with `$` parameters:
```sql
SET param_$1 = 'Hello';
```

### Incomplete CASE Expression
CASE without END:
```sql
SELECT CASE number -- missing END
```

## Explain Output Issues

These require changes to `internal/explain/`:

### Double Equals (==) Operator
The `==` operator creates extra nested equals/tuple nodes:
```sql
SELECT value == '127.0.0.1:9181'
```
Expected: `Function equals` with `Identifier` and `Literal`
Got: Nested `Function equals` with extra `Function tuple`

### CreateQuery Spacing
Some ClickHouse versions output extra space before `(children`:
```
CreateQuery d1 (children 1) -- two spaces
CreateQuery d1 (children 1) -- one space (our output)
```

### Server Error Messages in Expected Output
Some test expected outputs include trailing messages:
```
The query succeeded but the server error '42' was expected
```
These are not part of the actual EXPLAIN output.

## Lower Priority

### DateTime64 with Timezone
Type parameters with string timezone:
```sql
DateTime64(3,'UTC')
```

### Complex Type Expressions
Nested type expressions in column definitions:
```sql
CREATE TABLE t (c LowCardinality(UUID));
```

### Parameterized Views
View definitions with parameters:
```sql
CREATE VIEW v AS SELECT ... WHERE x={parity:Int8};
```

## Testing Notes

Run tests with timeout to catch infinite loops:
```bash
go test ./parser -timeout 5s -v
```

Count test results:
```bash
go test ./parser -timeout 5s -v 2>&1 | grep -E 'PASS:|SKIP:' | cut -d':' -f1 | sort | uniq -c
```

View explain mismatches:
```bash
go test ./parser -timeout 5s -v 2>&1 | grep -A 30 "TODO: Explain output mismatch" | head -100
```
2 changes: 2 additions & 0 deletions ast/ast.go
Original file line number Diff line number Diff line change
Expand Up @@ -315,6 +315,7 @@ type DropQuery struct {
Database string `json:"database,omitempty"`
Table string `json:"table,omitempty"`
View string `json:"view,omitempty"`
User string `json:"user,omitempty"`
Temporary bool `json:"temporary,omitempty"`
OnCluster string `json:"on_cluster,omitempty"`
DropDatabase bool `json:"drop_database,omitempty"`
Expand Down Expand Up @@ -449,6 +450,7 @@ const (
ShowCreateDB ShowType = "CREATE_DATABASE"
ShowColumns ShowType = "COLUMNS"
ShowDictionaries ShowType = "DICTIONARIES"
ShowFunctions ShowType = "FUNCTIONS"
)

// ExplainQuery represents an EXPLAIN statement.
Expand Down
173 changes: 173 additions & 0 deletions internal/explain/explain.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,173 @@
// Package explain provides EXPLAIN AST output functionality for ClickHouse SQL.
package explain

import (
"fmt"
"strings"

"github.com/kyleconroy/doubleclick/ast"
)

// Explain returns the EXPLAIN AST output for a statement, matching ClickHouse's format.
func Explain(stmt ast.Statement) string {
var sb strings.Builder
Node(&sb, stmt, 0)
return sb.String()
}

// Node writes the EXPLAIN AST output for an AST node.
func Node(sb *strings.Builder, node interface{}, depth int) {
if node == nil {
// nil can represent an empty tuple in function arguments
indent := strings.Repeat(" ", depth)
fmt.Fprintf(sb, "%sFunction tuple (children %d)\n", indent, 1)
fmt.Fprintf(sb, "%s ExpressionList\n", indent)
return
}

indent := strings.Repeat(" ", depth)

switch n := node.(type) {
// Select statements
case *ast.SelectWithUnionQuery:
explainSelectWithUnionQuery(sb, n, indent, depth)
case *ast.SelectQuery:
explainSelectQuery(sb, n, indent, depth)

// Tables
case *ast.TablesInSelectQuery:
explainTablesInSelectQuery(sb, n, indent, depth)
case *ast.TablesInSelectQueryElement:
explainTablesInSelectQueryElement(sb, n, indent, depth)
case *ast.TableExpression:
explainTableExpression(sb, n, indent, depth)
case *ast.TableIdentifier:
explainTableIdentifier(sb, n, indent)
case *ast.ArrayJoinClause:
explainArrayJoinClause(sb, n, indent, depth)
case *ast.TableJoin:
explainTableJoin(sb, n, indent, depth)

// Expressions
case *ast.OrderByElement:
explainOrderByElement(sb, n, indent, depth)
case *ast.Identifier:
explainIdentifier(sb, n, indent)
case *ast.Literal:
explainLiteral(sb, n, indent, depth)
case *ast.BinaryExpr:
explainBinaryExpr(sb, n, indent, depth)
case *ast.UnaryExpr:
explainUnaryExpr(sb, n, indent, depth)
case *ast.Subquery:
explainSubquery(sb, n, indent, depth)
case *ast.AliasedExpr:
explainAliasedExpr(sb, n, depth)
case *ast.Asterisk:
explainAsterisk(sb, n, indent)

// Functions
case *ast.FunctionCall:
explainFunctionCall(sb, n, indent, depth)
case *ast.Lambda:
explainLambda(sb, n, indent, depth)
case *ast.CastExpr:
explainCastExpr(sb, n, indent, depth)
case *ast.InExpr:
explainInExpr(sb, n, indent, depth)
case *ast.TernaryExpr:
explainTernaryExpr(sb, n, indent, depth)
case *ast.ArrayAccess:
explainArrayAccess(sb, n, indent, depth)
case *ast.TupleAccess:
explainTupleAccess(sb, n, indent, depth)
case *ast.LikeExpr:
explainLikeExpr(sb, n, indent, depth)
case *ast.BetweenExpr:
explainBetweenExpr(sb, n, indent, depth)
case *ast.IsNullExpr:
explainIsNullExpr(sb, n, indent, depth)
case *ast.CaseExpr:
explainCaseExpr(sb, n, indent, depth)
case *ast.IntervalExpr:
explainIntervalExpr(sb, n, indent, depth)
case *ast.ExistsExpr:
explainExistsExpr(sb, n, indent, depth)
case *ast.ExtractExpr:
explainExtractExpr(sb, n, indent, depth)

// DDL statements
case *ast.CreateQuery:
explainCreateQuery(sb, n, indent, depth)
case *ast.DropQuery:
explainDropQuery(sb, n, indent)
case *ast.SetQuery:
explainSetQuery(sb, indent)
case *ast.SystemQuery:
explainSystemQuery(sb, indent)
case *ast.ExplainQuery:
explainExplainQuery(sb, n, indent, depth)
case *ast.ShowQuery:
explainShowQuery(sb, n, indent)
case *ast.UseQuery:
explainUseQuery(sb, n, indent)
case *ast.DescribeQuery:
explainDescribeQuery(sb, n, indent)

// Types
case *ast.DataType:
explainDataType(sb, n, indent, depth)
case *ast.Parameter:
explainParameter(sb, n, indent)

default:
// For unhandled types, just print the type name
fmt.Fprintf(sb, "%s%T\n", indent, node)
}
}

// TablesWithArrayJoin handles FROM and ARRAY JOIN together as TablesInSelectQuery
func TablesWithArrayJoin(sb *strings.Builder, from *ast.TablesInSelectQuery, arrayJoin *ast.ArrayJoinClause, depth int) {
indent := strings.Repeat(" ", depth)

tableCount := 0
if from != nil {
tableCount = len(from.Tables)
}
if arrayJoin != nil {
tableCount++
}

fmt.Fprintf(sb, "%sTablesInSelectQuery (children %d)\n", indent, tableCount)

if from != nil {
for _, t := range from.Tables {
Node(sb, t, depth+1)
}
}

if arrayJoin != nil {
// ARRAY JOIN is wrapped in TablesInSelectQueryElement
fmt.Fprintf(sb, "%s TablesInSelectQueryElement (children %d)\n", indent, 1)
Node(sb, arrayJoin, depth+2)
}
}

// Column handles column declarations
func Column(sb *strings.Builder, col *ast.ColumnDeclaration, depth int) {
indent := strings.Repeat(" ", depth)
children := 0
if col.Type != nil {
children++
}
if col.Default != nil {
children++
}
fmt.Fprintf(sb, "%sColumnDeclaration %s (children %d)\n", indent, col.Name, children)
if col.Type != nil {
Node(sb, col.Type, depth+1)
}
if col.Default != nil {
Node(sb, col.Default, depth+1)
}
}
Loading