diff --git a/ast/ast.go b/ast/ast.go deleted file mode 100644 index af3874c2..00000000 --- a/ast/ast.go +++ /dev/null @@ -1,271 +0,0 @@ -// Package ast defines the AST types for T-SQL parsing. -package ast - -// Node is the interface implemented by all AST nodes. -type Node interface { - node() -} - -// Script is the root AST node representing a T-SQL script. -type Script struct { - Batches []*Batch `json:"Batches,omitempty"` -} - -func (*Script) node() {} - -// Batch represents a T-SQL batch of statements. -type Batch struct { - Statements []Statement `json:"Statements,omitempty"` -} - -func (*Batch) node() {} - -// Statement is the interface implemented by all statement types. -type Statement interface { - Node - statement() -} - -// SelectStatement represents a SELECT statement. -type SelectStatement struct { - QueryExpression QueryExpression `json:"QueryExpression,omitempty"` -} - -func (*SelectStatement) node() {} -func (*SelectStatement) statement() {} - -// QueryExpression is the interface for query expressions. -type QueryExpression interface { - Node - queryExpression() -} - -// QuerySpecification represents a query specification (SELECT ... FROM ...). -type QuerySpecification struct { - UniqueRowFilter string `json:"UniqueRowFilter,omitempty"` - SelectElements []SelectElement `json:"SelectElements,omitempty"` - FromClause *FromClause `json:"FromClause,omitempty"` - WhereClause *WhereClause `json:"WhereClause,omitempty"` - GroupByClause *GroupByClause `json:"GroupByClause,omitempty"` - HavingClause *HavingClause `json:"HavingClause,omitempty"` - OrderByClause *OrderByClause `json:"OrderByClause,omitempty"` -} - -func (*QuerySpecification) node() {} -func (*QuerySpecification) queryExpression() {} - -// SelectElement is the interface for select list elements. -type SelectElement interface { - Node - selectElement() -} - -// SelectScalarExpression represents a scalar expression in a select list. -type SelectScalarExpression struct { - Expression ScalarExpression `json:"Expression,omitempty"` - ColumnName *IdentifierOrValueExpression `json:"ColumnName,omitempty"` -} - -func (*SelectScalarExpression) node() {} -func (*SelectScalarExpression) selectElement() {} - -// SelectStarExpression represents SELECT *. -type SelectStarExpression struct { - Qualifier *MultiPartIdentifier `json:"Qualifier,omitempty"` -} - -func (*SelectStarExpression) node() {} -func (*SelectStarExpression) selectElement() {} - -// ScalarExpression is the interface for scalar expressions. -type ScalarExpression interface { - Node - scalarExpression() -} - -// ColumnReferenceExpression represents a column reference. -type ColumnReferenceExpression struct { - ColumnType string `json:"ColumnType,omitempty"` - MultiPartIdentifier *MultiPartIdentifier `json:"MultiPartIdentifier,omitempty"` -} - -func (*ColumnReferenceExpression) node() {} -func (*ColumnReferenceExpression) scalarExpression() {} - -// IntegerLiteral represents an integer literal. -type IntegerLiteral struct { - LiteralType string `json:"LiteralType,omitempty"` - Value string `json:"Value,omitempty"` -} - -func (*IntegerLiteral) node() {} -func (*IntegerLiteral) scalarExpression() {} - -// StringLiteral represents a string literal. -type StringLiteral struct { - LiteralType string `json:"LiteralType,omitempty"` - IsNational bool `json:"IsNational,omitempty"` - IsLargeObject bool `json:"IsLargeObject,omitempty"` - Value string `json:"Value,omitempty"` -} - -func (*StringLiteral) node() {} -func (*StringLiteral) scalarExpression() {} - -// FunctionCall represents a function call. -type FunctionCall struct { - FunctionName *Identifier `json:"FunctionName,omitempty"` - Parameters []ScalarExpression `json:"Parameters,omitempty"` - UniqueRowFilter string `json:"UniqueRowFilter,omitempty"` - WithArrayWrapper bool `json:"WithArrayWrapper,omitempty"` -} - -func (*FunctionCall) node() {} -func (*FunctionCall) scalarExpression() {} - -// Identifier represents an identifier. -type Identifier struct { - Value string `json:"Value,omitempty"` - QuoteType string `json:"QuoteType,omitempty"` -} - -func (*Identifier) node() {} - -// MultiPartIdentifier represents a multi-part identifier (e.g., schema.table.column). -type MultiPartIdentifier struct { - Count int `json:"Count,omitempty"` - Identifiers []*Identifier `json:"Identifiers,omitempty"` -} - -func (*MultiPartIdentifier) node() {} - -// IdentifierOrValueExpression represents either an identifier or a value expression. -type IdentifierOrValueExpression struct { - Value string `json:"Value,omitempty"` - Identifier *Identifier `json:"Identifier,omitempty"` -} - -func (*IdentifierOrValueExpression) node() {} - -// FromClause represents a FROM clause. -type FromClause struct { - TableReferences []TableReference `json:"TableReferences,omitempty"` -} - -func (*FromClause) node() {} - -// TableReference is the interface for table references. -type TableReference interface { - Node - tableReference() -} - -// NamedTableReference represents a named table reference. -type NamedTableReference struct { - SchemaObject *SchemaObjectName `json:"SchemaObject,omitempty"` - Alias *Identifier `json:"Alias,omitempty"` - ForPath bool `json:"ForPath,omitempty"` -} - -func (*NamedTableReference) node() {} -func (*NamedTableReference) tableReference() {} - -// SchemaObjectName represents a schema object name. -type SchemaObjectName struct { - BaseIdentifier *Identifier `json:"BaseIdentifier,omitempty"` - Count int `json:"Count,omitempty"` - Identifiers []*Identifier `json:"Identifiers,omitempty"` -} - -func (*SchemaObjectName) node() {} - -// QualifiedJoin represents a qualified join. -type QualifiedJoin struct { - SearchCondition BooleanExpression `json:"SearchCondition,omitempty"` - QualifiedJoinType string `json:"QualifiedJoinType,omitempty"` - JoinHint string `json:"JoinHint,omitempty"` - FirstTableReference TableReference `json:"FirstTableReference,omitempty"` - SecondTableReference TableReference `json:"SecondTableReference,omitempty"` -} - -func (*QualifiedJoin) node() {} -func (*QualifiedJoin) tableReference() {} - -// WhereClause represents a WHERE clause. -type WhereClause struct { - SearchCondition BooleanExpression `json:"SearchCondition,omitempty"` -} - -func (*WhereClause) node() {} - -// BooleanExpression is the interface for boolean expressions. -type BooleanExpression interface { - Node - booleanExpression() -} - -// BooleanComparisonExpression represents a comparison expression. -type BooleanComparisonExpression struct { - ComparisonType string `json:"ComparisonType,omitempty"` - FirstExpression ScalarExpression `json:"FirstExpression,omitempty"` - SecondExpression ScalarExpression `json:"SecondExpression,omitempty"` -} - -func (*BooleanComparisonExpression) node() {} -func (*BooleanComparisonExpression) booleanExpression() {} - -// BooleanBinaryExpression represents a binary boolean expression (AND, OR). -type BooleanBinaryExpression struct { - BinaryExpressionType string `json:"BinaryExpressionType,omitempty"` - FirstExpression BooleanExpression `json:"FirstExpression,omitempty"` - SecondExpression BooleanExpression `json:"SecondExpression,omitempty"` -} - -func (*BooleanBinaryExpression) node() {} -func (*BooleanBinaryExpression) booleanExpression() {} - -// GroupByClause represents a GROUP BY clause. -type GroupByClause struct { - GroupByOption string `json:"GroupByOption,omitempty"` - All bool `json:"All,omitempty"` - GroupingSpecifications []GroupingSpecification `json:"GroupingSpecifications,omitempty"` -} - -func (*GroupByClause) node() {} - -// GroupingSpecification is the interface for grouping specifications. -type GroupingSpecification interface { - Node - groupingSpecification() -} - -// ExpressionGroupingSpecification represents a grouping by expression. -type ExpressionGroupingSpecification struct { - Expression ScalarExpression `json:"Expression,omitempty"` - DistributedAggregation bool `json:"DistributedAggregation,omitempty"` -} - -func (*ExpressionGroupingSpecification) node() {} -func (*ExpressionGroupingSpecification) groupingSpecification() {} - -// HavingClause represents a HAVING clause. -type HavingClause struct { - SearchCondition BooleanExpression `json:"SearchCondition,omitempty"` -} - -func (*HavingClause) node() {} - -// OrderByClause represents an ORDER BY clause. -type OrderByClause struct { - OrderByElements []*ExpressionWithSortOrder `json:"OrderByElements,omitempty"` -} - -func (*OrderByClause) node() {} - -// ExpressionWithSortOrder represents an expression with sort order. -type ExpressionWithSortOrder struct { - SortOrder string `json:"SortOrder,omitempty"` - Expression ScalarExpression `json:"Expression,omitempty"` -} - -func (*ExpressionWithSortOrder) node() {} diff --git a/ast/batch.go b/ast/batch.go new file mode 100644 index 00000000..427b61e1 --- /dev/null +++ b/ast/batch.go @@ -0,0 +1,8 @@ +package ast + +// Batch represents a T-SQL batch of statements. +type Batch struct { + Statements []Statement `json:"Statements,omitempty"` +} + +func (*Batch) node() {} diff --git a/ast/boolean_binary_expression.go b/ast/boolean_binary_expression.go new file mode 100644 index 00000000..ac9ea9ce --- /dev/null +++ b/ast/boolean_binary_expression.go @@ -0,0 +1,11 @@ +package ast + +// BooleanBinaryExpression represents a binary boolean expression (AND, OR). +type BooleanBinaryExpression struct { + BinaryExpressionType string `json:"BinaryExpressionType,omitempty"` + FirstExpression BooleanExpression `json:"FirstExpression,omitempty"` + SecondExpression BooleanExpression `json:"SecondExpression,omitempty"` +} + +func (*BooleanBinaryExpression) node() {} +func (*BooleanBinaryExpression) booleanExpression() {} diff --git a/ast/boolean_comparison_expression.go b/ast/boolean_comparison_expression.go new file mode 100644 index 00000000..b48fa2fb --- /dev/null +++ b/ast/boolean_comparison_expression.go @@ -0,0 +1,11 @@ +package ast + +// BooleanComparisonExpression represents a comparison expression. +type BooleanComparisonExpression struct { + ComparisonType string `json:"ComparisonType,omitempty"` + FirstExpression ScalarExpression `json:"FirstExpression,omitempty"` + SecondExpression ScalarExpression `json:"SecondExpression,omitempty"` +} + +func (*BooleanComparisonExpression) node() {} +func (*BooleanComparisonExpression) booleanExpression() {} diff --git a/ast/boolean_expression.go b/ast/boolean_expression.go new file mode 100644 index 00000000..47e02442 --- /dev/null +++ b/ast/boolean_expression.go @@ -0,0 +1,7 @@ +package ast + +// BooleanExpression is the interface for boolean expressions. +type BooleanExpression interface { + Node + booleanExpression() +} diff --git a/ast/column_reference_expression.go b/ast/column_reference_expression.go new file mode 100644 index 00000000..e4db7e6c --- /dev/null +++ b/ast/column_reference_expression.go @@ -0,0 +1,10 @@ +package ast + +// ColumnReferenceExpression represents a column reference. +type ColumnReferenceExpression struct { + ColumnType string `json:"ColumnType,omitempty"` + MultiPartIdentifier *MultiPartIdentifier `json:"MultiPartIdentifier,omitempty"` +} + +func (*ColumnReferenceExpression) node() {} +func (*ColumnReferenceExpression) scalarExpression() {} diff --git a/ast/expression_grouping_specification.go b/ast/expression_grouping_specification.go new file mode 100644 index 00000000..7fde8fed --- /dev/null +++ b/ast/expression_grouping_specification.go @@ -0,0 +1,10 @@ +package ast + +// ExpressionGroupingSpecification represents a grouping by expression. +type ExpressionGroupingSpecification struct { + Expression ScalarExpression `json:"Expression,omitempty"` + DistributedAggregation bool `json:"DistributedAggregation,omitempty"` +} + +func (*ExpressionGroupingSpecification) node() {} +func (*ExpressionGroupingSpecification) groupingSpecification() {} diff --git a/ast/expression_with_sort_order.go b/ast/expression_with_sort_order.go new file mode 100644 index 00000000..5bdc24ca --- /dev/null +++ b/ast/expression_with_sort_order.go @@ -0,0 +1,9 @@ +package ast + +// ExpressionWithSortOrder represents an expression with sort order. +type ExpressionWithSortOrder struct { + SortOrder string `json:"SortOrder,omitempty"` + Expression ScalarExpression `json:"Expression,omitempty"` +} + +func (*ExpressionWithSortOrder) node() {} diff --git a/ast/from_clause.go b/ast/from_clause.go new file mode 100644 index 00000000..8b6497e2 --- /dev/null +++ b/ast/from_clause.go @@ -0,0 +1,8 @@ +package ast + +// FromClause represents a FROM clause. +type FromClause struct { + TableReferences []TableReference `json:"TableReferences,omitempty"` +} + +func (*FromClause) node() {} diff --git a/ast/function_call.go b/ast/function_call.go new file mode 100644 index 00000000..9fff89db --- /dev/null +++ b/ast/function_call.go @@ -0,0 +1,12 @@ +package ast + +// FunctionCall represents a function call. +type FunctionCall struct { + FunctionName *Identifier `json:"FunctionName,omitempty"` + Parameters []ScalarExpression `json:"Parameters,omitempty"` + UniqueRowFilter string `json:"UniqueRowFilter,omitempty"` + WithArrayWrapper bool `json:"WithArrayWrapper,omitempty"` +} + +func (*FunctionCall) node() {} +func (*FunctionCall) scalarExpression() {} diff --git a/ast/group_by_clause.go b/ast/group_by_clause.go new file mode 100644 index 00000000..00cb5392 --- /dev/null +++ b/ast/group_by_clause.go @@ -0,0 +1,10 @@ +package ast + +// GroupByClause represents a GROUP BY clause. +type GroupByClause struct { + GroupByOption string `json:"GroupByOption,omitempty"` + All bool `json:"All,omitempty"` + GroupingSpecifications []GroupingSpecification `json:"GroupingSpecifications,omitempty"` +} + +func (*GroupByClause) node() {} diff --git a/ast/grouping_specification.go b/ast/grouping_specification.go new file mode 100644 index 00000000..adc4dc06 --- /dev/null +++ b/ast/grouping_specification.go @@ -0,0 +1,7 @@ +package ast + +// GroupingSpecification is the interface for grouping specifications. +type GroupingSpecification interface { + Node + groupingSpecification() +} diff --git a/ast/having_clause.go b/ast/having_clause.go new file mode 100644 index 00000000..ea007f63 --- /dev/null +++ b/ast/having_clause.go @@ -0,0 +1,8 @@ +package ast + +// HavingClause represents a HAVING clause. +type HavingClause struct { + SearchCondition BooleanExpression `json:"SearchCondition,omitempty"` +} + +func (*HavingClause) node() {} diff --git a/ast/identifier.go b/ast/identifier.go new file mode 100644 index 00000000..07248de4 --- /dev/null +++ b/ast/identifier.go @@ -0,0 +1,9 @@ +package ast + +// Identifier represents an identifier. +type Identifier struct { + Value string `json:"Value,omitempty"` + QuoteType string `json:"QuoteType,omitempty"` +} + +func (*Identifier) node() {} diff --git a/ast/identifier_or_value_expression.go b/ast/identifier_or_value_expression.go new file mode 100644 index 00000000..49d3bd59 --- /dev/null +++ b/ast/identifier_or_value_expression.go @@ -0,0 +1,9 @@ +package ast + +// IdentifierOrValueExpression represents either an identifier or a value expression. +type IdentifierOrValueExpression struct { + Value string `json:"Value,omitempty"` + Identifier *Identifier `json:"Identifier,omitempty"` +} + +func (*IdentifierOrValueExpression) node() {} diff --git a/ast/integer_literal.go b/ast/integer_literal.go new file mode 100644 index 00000000..1e1dbc4d --- /dev/null +++ b/ast/integer_literal.go @@ -0,0 +1,10 @@ +package ast + +// IntegerLiteral represents an integer literal. +type IntegerLiteral struct { + LiteralType string `json:"LiteralType,omitempty"` + Value string `json:"Value,omitempty"` +} + +func (*IntegerLiteral) node() {} +func (*IntegerLiteral) scalarExpression() {} diff --git a/ast/multi_part_identifier.go b/ast/multi_part_identifier.go new file mode 100644 index 00000000..392f0c35 --- /dev/null +++ b/ast/multi_part_identifier.go @@ -0,0 +1,9 @@ +package ast + +// MultiPartIdentifier represents a multi-part identifier (e.g., schema.table.column). +type MultiPartIdentifier struct { + Count int `json:"Count,omitempty"` + Identifiers []*Identifier `json:"Identifiers,omitempty"` +} + +func (*MultiPartIdentifier) node() {} diff --git a/ast/named_table_reference.go b/ast/named_table_reference.go new file mode 100644 index 00000000..b6ada740 --- /dev/null +++ b/ast/named_table_reference.go @@ -0,0 +1,11 @@ +package ast + +// NamedTableReference represents a named table reference. +type NamedTableReference struct { + SchemaObject *SchemaObjectName `json:"SchemaObject,omitempty"` + Alias *Identifier `json:"Alias,omitempty"` + ForPath bool `json:"ForPath,omitempty"` +} + +func (*NamedTableReference) node() {} +func (*NamedTableReference) tableReference() {} diff --git a/ast/node.go b/ast/node.go new file mode 100644 index 00000000..d605e8d9 --- /dev/null +++ b/ast/node.go @@ -0,0 +1,7 @@ +// Package ast defines the AST types for T-SQL parsing. +package ast + +// Node is the interface implemented by all AST nodes. +type Node interface { + node() +} diff --git a/ast/optimizer_hint.go b/ast/optimizer_hint.go new file mode 100644 index 00000000..b808976e --- /dev/null +++ b/ast/optimizer_hint.go @@ -0,0 +1,8 @@ +package ast + +// OptimizerHint represents an optimizer hint in an OPTION clause. +type OptimizerHint struct { + HintKind string `json:"HintKind,omitempty"` +} + +func (*OptimizerHint) node() {} diff --git a/ast/order_by_clause.go b/ast/order_by_clause.go new file mode 100644 index 00000000..1515e569 --- /dev/null +++ b/ast/order_by_clause.go @@ -0,0 +1,8 @@ +package ast + +// OrderByClause represents an ORDER BY clause. +type OrderByClause struct { + OrderByElements []*ExpressionWithSortOrder `json:"OrderByElements,omitempty"` +} + +func (*OrderByClause) node() {} diff --git a/ast/qualified_join.go b/ast/qualified_join.go new file mode 100644 index 00000000..49545145 --- /dev/null +++ b/ast/qualified_join.go @@ -0,0 +1,13 @@ +package ast + +// QualifiedJoin represents a qualified join. +type QualifiedJoin struct { + SearchCondition BooleanExpression `json:"SearchCondition,omitempty"` + QualifiedJoinType string `json:"QualifiedJoinType,omitempty"` + JoinHint string `json:"JoinHint,omitempty"` + FirstTableReference TableReference `json:"FirstTableReference,omitempty"` + SecondTableReference TableReference `json:"SecondTableReference,omitempty"` +} + +func (*QualifiedJoin) node() {} +func (*QualifiedJoin) tableReference() {} diff --git a/ast/query_expression.go b/ast/query_expression.go new file mode 100644 index 00000000..def1e6b8 --- /dev/null +++ b/ast/query_expression.go @@ -0,0 +1,7 @@ +package ast + +// QueryExpression is the interface for query expressions. +type QueryExpression interface { + Node + queryExpression() +} diff --git a/ast/query_specification.go b/ast/query_specification.go new file mode 100644 index 00000000..fb2a3bc3 --- /dev/null +++ b/ast/query_specification.go @@ -0,0 +1,15 @@ +package ast + +// QuerySpecification represents a query specification (SELECT ... FROM ...). +type QuerySpecification struct { + UniqueRowFilter string `json:"UniqueRowFilter,omitempty"` + SelectElements []SelectElement `json:"SelectElements,omitempty"` + FromClause *FromClause `json:"FromClause,omitempty"` + WhereClause *WhereClause `json:"WhereClause,omitempty"` + GroupByClause *GroupByClause `json:"GroupByClause,omitempty"` + HavingClause *HavingClause `json:"HavingClause,omitempty"` + OrderByClause *OrderByClause `json:"OrderByClause,omitempty"` +} + +func (*QuerySpecification) node() {} +func (*QuerySpecification) queryExpression() {} diff --git a/ast/scalar_expression.go b/ast/scalar_expression.go new file mode 100644 index 00000000..a8a1e301 --- /dev/null +++ b/ast/scalar_expression.go @@ -0,0 +1,7 @@ +package ast + +// ScalarExpression is the interface for scalar expressions. +type ScalarExpression interface { + Node + scalarExpression() +} diff --git a/ast/schema_object_name.go b/ast/schema_object_name.go new file mode 100644 index 00000000..a4894dae --- /dev/null +++ b/ast/schema_object_name.go @@ -0,0 +1,10 @@ +package ast + +// SchemaObjectName represents a schema object name. +type SchemaObjectName struct { + BaseIdentifier *Identifier `json:"BaseIdentifier,omitempty"` + Count int `json:"Count,omitempty"` + Identifiers []*Identifier `json:"Identifiers,omitempty"` +} + +func (*SchemaObjectName) node() {} diff --git a/ast/script.go b/ast/script.go new file mode 100644 index 00000000..d587ace8 --- /dev/null +++ b/ast/script.go @@ -0,0 +1,8 @@ +package ast + +// Script is the root AST node representing a T-SQL script. +type Script struct { + Batches []*Batch `json:"Batches,omitempty"` +} + +func (*Script) node() {} diff --git a/ast/select_element.go b/ast/select_element.go new file mode 100644 index 00000000..985969fc --- /dev/null +++ b/ast/select_element.go @@ -0,0 +1,7 @@ +package ast + +// SelectElement is the interface for select list elements. +type SelectElement interface { + Node + selectElement() +} diff --git a/ast/select_scalar_expression.go b/ast/select_scalar_expression.go new file mode 100644 index 00000000..62504003 --- /dev/null +++ b/ast/select_scalar_expression.go @@ -0,0 +1,10 @@ +package ast + +// SelectScalarExpression represents a scalar expression in a select list. +type SelectScalarExpression struct { + Expression ScalarExpression `json:"Expression,omitempty"` + ColumnName *IdentifierOrValueExpression `json:"ColumnName,omitempty"` +} + +func (*SelectScalarExpression) node() {} +func (*SelectScalarExpression) selectElement() {} diff --git a/ast/select_star_expression.go b/ast/select_star_expression.go new file mode 100644 index 00000000..90cd2350 --- /dev/null +++ b/ast/select_star_expression.go @@ -0,0 +1,9 @@ +package ast + +// SelectStarExpression represents SELECT *. +type SelectStarExpression struct { + Qualifier *MultiPartIdentifier `json:"Qualifier,omitempty"` +} + +func (*SelectStarExpression) node() {} +func (*SelectStarExpression) selectElement() {} diff --git a/ast/select_statement.go b/ast/select_statement.go new file mode 100644 index 00000000..36d1b55f --- /dev/null +++ b/ast/select_statement.go @@ -0,0 +1,10 @@ +package ast + +// SelectStatement represents a SELECT statement. +type SelectStatement struct { + QueryExpression QueryExpression `json:"QueryExpression,omitempty"` + OptimizerHints []*OptimizerHint `json:"OptimizerHints,omitempty"` +} + +func (*SelectStatement) node() {} +func (*SelectStatement) statement() {} diff --git a/ast/statement.go b/ast/statement.go new file mode 100644 index 00000000..860bfd1a --- /dev/null +++ b/ast/statement.go @@ -0,0 +1,7 @@ +package ast + +// Statement is the interface implemented by all statement types. +type Statement interface { + Node + statement() +} diff --git a/ast/string_literal.go b/ast/string_literal.go new file mode 100644 index 00000000..f69ad02b --- /dev/null +++ b/ast/string_literal.go @@ -0,0 +1,12 @@ +package ast + +// StringLiteral represents a string literal. +type StringLiteral struct { + LiteralType string `json:"LiteralType,omitempty"` + IsNational bool `json:"IsNational,omitempty"` + IsLargeObject bool `json:"IsLargeObject,omitempty"` + Value string `json:"Value,omitempty"` +} + +func (*StringLiteral) node() {} +func (*StringLiteral) scalarExpression() {} diff --git a/ast/table_reference.go b/ast/table_reference.go new file mode 100644 index 00000000..cc056829 --- /dev/null +++ b/ast/table_reference.go @@ -0,0 +1,7 @@ +package ast + +// TableReference is the interface for table references. +type TableReference interface { + Node + tableReference() +} diff --git a/ast/where_clause.go b/ast/where_clause.go new file mode 100644 index 00000000..f71dfaa3 --- /dev/null +++ b/ast/where_clause.go @@ -0,0 +1,8 @@ +package ast + +// WhereClause represents a WHERE clause. +type WhereClause struct { + SearchCondition BooleanExpression `json:"SearchCondition,omitempty"` +} + +func (*WhereClause) node() {} diff --git a/go.mod b/go.mod index 33c335f1..b1a470b3 100644 --- a/go.mod +++ b/go.mod @@ -1,3 +1,3 @@ module github.com/kyleconroy/teesql -go 1.25 +go 1.21 diff --git a/parser/lexer.go b/parser/lexer.go new file mode 100644 index 00000000..3dd8ac7a --- /dev/null +++ b/parser/lexer.go @@ -0,0 +1,291 @@ +package parser + +import ( + "strings" + "unicode" +) + +// TokenType represents the type of a token. +type TokenType int + +const ( + TokenEOF TokenType = iota + TokenError + TokenIdent + TokenNumber + TokenString + TokenStar + TokenComma + TokenDot + TokenLParen + TokenRParen + TokenLBracket + TokenRBracket + TokenSemicolon + TokenEquals + TokenLessThan + TokenGreaterThan + TokenPlus + TokenMinus + + // Keywords + TokenSelect + TokenFrom + TokenWhere + TokenAnd + TokenOr + TokenAs + TokenOption + TokenAll + TokenDistinct +) + +// Token represents a lexical token. +type Token struct { + Type TokenType + Literal string + Pos int +} + +// Lexer tokenizes T-SQL input. +type Lexer struct { + input string + pos int + readPos int + ch byte +} + +// NewLexer creates a new Lexer for the given input. +func NewLexer(input string) *Lexer { + l := &Lexer{input: input} + l.readChar() + return l +} + +func (l *Lexer) readChar() { + if l.readPos >= len(l.input) { + l.ch = 0 + } else { + l.ch = l.input[l.readPos] + } + l.pos = l.readPos + l.readPos++ +} + +func (l *Lexer) peekChar() byte { + if l.readPos >= len(l.input) { + return 0 + } + return l.input[l.readPos] +} + +// NextToken returns the next token from the input. +func (l *Lexer) NextToken() Token { + l.skipWhitespaceAndComments() + + tok := Token{Pos: l.pos} + + switch l.ch { + case 0: + tok.Type = TokenEOF + tok.Literal = "" + case '*': + tok.Type = TokenStar + tok.Literal = "*" + l.readChar() + case ',': + tok.Type = TokenComma + tok.Literal = "," + l.readChar() + case '.': + tok.Type = TokenDot + tok.Literal = "." + l.readChar() + case '(': + tok.Type = TokenLParen + tok.Literal = "(" + l.readChar() + case ')': + tok.Type = TokenRParen + tok.Literal = ")" + l.readChar() + case '[': + tok = l.readBracketedIdentifier() + case ']': + tok.Type = TokenRBracket + tok.Literal = "]" + l.readChar() + case ';': + tok.Type = TokenSemicolon + tok.Literal = ";" + l.readChar() + case '=': + tok.Type = TokenEquals + tok.Literal = "=" + l.readChar() + case '<': + tok.Type = TokenLessThan + tok.Literal = "<" + l.readChar() + case '>': + tok.Type = TokenGreaterThan + tok.Literal = ">" + l.readChar() + case '+': + tok.Type = TokenPlus + tok.Literal = "+" + l.readChar() + case '-': + tok.Type = TokenMinus + tok.Literal = "-" + l.readChar() + case '\'': + tok = l.readString() + default: + if isLetter(l.ch) || l.ch == '_' || l.ch == '@' || l.ch == '#' { + tok = l.readIdentifier() + } else if isDigit(l.ch) { + tok = l.readNumber() + } else { + tok.Type = TokenError + tok.Literal = string(l.ch) + l.readChar() + } + } + + return tok +} + +func (l *Lexer) skipWhitespaceAndComments() { + for { + // Skip whitespace + for l.ch != 0 && (l.ch == ' ' || l.ch == '\t' || l.ch == '\n' || l.ch == '\r') { + l.readChar() + } + + // Skip line comments (-- ...) + if l.ch == '-' && l.peekChar() == '-' { + for l.ch != 0 && l.ch != '\n' { + l.readChar() + } + continue + } + + // Skip block comments (/* ... */) + if l.ch == '/' && l.peekChar() == '*' { + l.readChar() // skip / + l.readChar() // skip * + for l.ch != 0 { + if l.ch == '*' && l.peekChar() == '/' { + l.readChar() // skip * + l.readChar() // skip / + break + } + l.readChar() + } + continue + } + + break + } +} + +func (l *Lexer) readIdentifier() Token { + startPos := l.pos + for isLetter(l.ch) || isDigit(l.ch) || l.ch == '_' || l.ch == '@' || l.ch == '#' { + l.readChar() + } + literal := l.input[startPos:l.pos] + return Token{ + Type: lookupKeyword(literal), + Literal: literal, + Pos: startPos, + } +} + +func (l *Lexer) readBracketedIdentifier() Token { + startPos := l.pos + l.readChar() // skip opening [ + for l.ch != 0 && l.ch != ']' { + l.readChar() + } + if l.ch == ']' { + l.readChar() // skip closing ] + } + return Token{ + Type: TokenIdent, + Literal: l.input[startPos:l.pos], + Pos: startPos, + } +} + +func (l *Lexer) readString() Token { + startPos := l.pos + l.readChar() // skip opening quote + for l.ch != 0 { + if l.ch == '\'' { + if l.peekChar() == '\'' { + // Escaped quote + l.readChar() + l.readChar() + continue + } + break + } + l.readChar() + } + if l.ch == '\'' { + l.readChar() // skip closing quote + } + return Token{ + Type: TokenString, + Literal: l.input[startPos:l.pos], + Pos: startPos, + } +} + +func (l *Lexer) readNumber() Token { + startPos := l.pos + for isDigit(l.ch) { + l.readChar() + } + // Handle decimal point + if l.ch == '.' && isDigit(l.peekChar()) { + l.readChar() + for isDigit(l.ch) { + l.readChar() + } + } + return Token{ + Type: TokenNumber, + Literal: l.input[startPos:l.pos], + Pos: startPos, + } +} + +func isLetter(ch byte) bool { + return unicode.IsLetter(rune(ch)) +} + +func isDigit(ch byte) bool { + return ch >= '0' && ch <= '9' +} + +var keywords = map[string]TokenType{ + "SELECT": TokenSelect, + "FROM": TokenFrom, + "WHERE": TokenWhere, + "AND": TokenAnd, + "OR": TokenOr, + "AS": TokenAs, + "OPTION": TokenOption, + "ALL": TokenAll, + "DISTINCT": TokenDistinct, +} + +func lookupKeyword(ident string) TokenType { + if tok, ok := keywords[strings.ToUpper(ident)]; ok { + return tok + } + return TokenIdent +} diff --git a/parser/parser.go b/parser/parser.go index d5b78cb6..6c3a5179 100644 --- a/parser/parser.go +++ b/parser/parser.go @@ -6,6 +6,7 @@ import ( "encoding/json" "fmt" "io" + "strings" "github.com/kyleconroy/teesql/ast" ) @@ -22,9 +23,371 @@ func Parse(ctx context.Context, r io.Reader) (*ast.Script, error) { return &ast.Script{}, nil } - // TODO: Implement actual T-SQL parsing - // For now, this is a placeholder that returns an empty script - return &ast.Script{}, nil + p := newParser(string(data)) + return p.parseScript() +} + +// Parser holds the parsing state. +type Parser struct { + lexer *Lexer + curTok Token + peekTok Token +} + +func newParser(input string) *Parser { + p := &Parser{lexer: NewLexer(input)} + // Read two tokens to initialize curTok and peekTok + p.nextToken() + p.nextToken() + return p +} + +func (p *Parser) nextToken() { + p.curTok = p.peekTok + p.peekTok = p.lexer.NextToken() +} + +func (p *Parser) parseScript() (*ast.Script, error) { + script := &ast.Script{} + + // Parse all batches (separated by GO) + batch, err := p.parseBatch() + if err != nil { + return nil, err + } + if batch != nil && len(batch.Statements) > 0 { + script.Batches = append(script.Batches, batch) + } + + return script, nil +} + +func (p *Parser) parseBatch() (*ast.Batch, error) { + batch := &ast.Batch{} + + for p.curTok.Type != TokenEOF { + // Skip GO statements (batch separators) + if p.curTok.Type == TokenIdent && strings.ToUpper(p.curTok.Literal) == "GO" { + p.nextToken() + continue + } + + stmt, err := p.parseStatement() + if err != nil { + return nil, err + } + if stmt != nil { + batch.Statements = append(batch.Statements, stmt) + } + } + + return batch, nil +} + +func (p *Parser) parseStatement() (ast.Statement, error) { + switch p.curTok.Type { + case TokenSelect: + return p.parseSelectStatement() + case TokenSemicolon: + p.nextToken() + return nil, nil + default: + return nil, fmt.Errorf("unexpected token: %s", p.curTok.Literal) + } +} + +func (p *Parser) parseSelectStatement() (*ast.SelectStatement, error) { + stmt := &ast.SelectStatement{} + + // Parse query expression + qe, err := p.parseQueryExpression() + if err != nil { + return nil, err + } + stmt.QueryExpression = qe + + // Parse optional OPTION clause + if p.curTok.Type == TokenOption { + hints, err := p.parseOptionClause() + if err != nil { + return nil, err + } + stmt.OptimizerHints = hints + } + + // Skip optional semicolon + if p.curTok.Type == TokenSemicolon { + p.nextToken() + } + + return stmt, nil +} + +func (p *Parser) parseQueryExpression() (ast.QueryExpression, error) { + return p.parseQuerySpecification() +} + +func (p *Parser) parseQuerySpecification() (*ast.QuerySpecification, error) { + qs := &ast.QuerySpecification{ + UniqueRowFilter: "NotSpecified", + } + + // Expect SELECT + if p.curTok.Type != TokenSelect { + return nil, fmt.Errorf("expected SELECT, got %s", p.curTok.Literal) + } + p.nextToken() + + // Check for ALL or DISTINCT + if p.curTok.Type == TokenAll { + qs.UniqueRowFilter = "All" + p.nextToken() + } else if p.curTok.Type == TokenDistinct { + qs.UniqueRowFilter = "Distinct" + p.nextToken() + } + + // Parse select elements + elements, err := p.parseSelectElements() + if err != nil { + return nil, err + } + qs.SelectElements = elements + + // Parse optional FROM clause + if p.curTok.Type == TokenFrom { + fromClause, err := p.parseFromClause() + if err != nil { + return nil, err + } + qs.FromClause = fromClause + } + + return qs, nil +} + +func (p *Parser) parseSelectElements() ([]ast.SelectElement, error) { + var elements []ast.SelectElement + + for { + elem, err := p.parseSelectElement() + if err != nil { + return nil, err + } + elements = append(elements, elem) + + if p.curTok.Type != TokenComma { + break + } + p.nextToken() // consume comma + } + + return elements, nil +} + +func (p *Parser) parseSelectElement() (ast.SelectElement, error) { + // Check for * + if p.curTok.Type == TokenStar { + p.nextToken() + return &ast.SelectStarExpression{}, nil + } + + // Otherwise parse a scalar expression + expr, err := p.parseScalarExpression() + if err != nil { + return nil, err + } + + return &ast.SelectScalarExpression{Expression: expr}, nil +} + +func (p *Parser) parseScalarExpression() (ast.ScalarExpression, error) { + // For now, only handle column references and identifiers + if p.curTok.Type == TokenIdent { + return p.parseColumnReference() + } + if p.curTok.Type == TokenNumber { + val := p.curTok.Literal + p.nextToken() + return &ast.IntegerLiteral{LiteralType: "Integer", Value: val}, nil + } + return nil, fmt.Errorf("unexpected token in expression: %s", p.curTok.Literal) +} + +func (p *Parser) parseColumnReference() (*ast.ColumnReferenceExpression, error) { + var identifiers []*ast.Identifier + + for { + if p.curTok.Type != TokenIdent { + break + } + + id := &ast.Identifier{ + Value: p.curTok.Literal, + QuoteType: "NotQuoted", + } + identifiers = append(identifiers, id) + p.nextToken() + + if p.curTok.Type != TokenDot { + break + } + p.nextToken() // consume dot + } + + return &ast.ColumnReferenceExpression{ + ColumnType: "Regular", + MultiPartIdentifier: &ast.MultiPartIdentifier{ + Count: len(identifiers), + Identifiers: identifiers, + }, + }, nil +} + +func (p *Parser) parseFromClause() (*ast.FromClause, error) { + // Consume FROM + if p.curTok.Type != TokenFrom { + return nil, fmt.Errorf("expected FROM, got %s", p.curTok.Literal) + } + p.nextToken() + + fc := &ast.FromClause{} + + // Parse table references + for { + ref, err := p.parseTableReference() + if err != nil { + return nil, err + } + fc.TableReferences = append(fc.TableReferences, ref) + + if p.curTok.Type != TokenComma { + break + } + p.nextToken() // consume comma + } + + return fc, nil +} + +func (p *Parser) parseTableReference() (ast.TableReference, error) { + return p.parseNamedTableReference() +} + +func (p *Parser) parseNamedTableReference() (*ast.NamedTableReference, error) { + ref := &ast.NamedTableReference{ + ForPath: false, + } + + // Parse schema object name (potentially multi-part: db.schema.table) + son, err := p.parseSchemaObjectName() + if err != nil { + return nil, err + } + ref.SchemaObject = son + + // Parse optional alias (AS alias or just alias) + if p.curTok.Type == TokenAs { + p.nextToken() + if p.curTok.Type != TokenIdent { + return nil, fmt.Errorf("expected identifier after AS, got %s", p.curTok.Literal) + } + ref.Alias = &ast.Identifier{Value: p.curTok.Literal, QuoteType: "NotQuoted"} + p.nextToken() + } else if p.curTok.Type == TokenIdent { + // Could be an alias without AS, but need to be careful not to consume keywords + upper := strings.ToUpper(p.curTok.Literal) + if upper != "WHERE" && upper != "GROUP" && upper != "HAVING" && upper != "ORDER" && upper != "OPTION" && upper != "GO" { + ref.Alias = &ast.Identifier{Value: p.curTok.Literal, QuoteType: "NotQuoted"} + p.nextToken() + } + } + + return ref, nil +} + +func (p *Parser) parseSchemaObjectName() (*ast.SchemaObjectName, error) { + var identifiers []*ast.Identifier + + for { + if p.curTok.Type != TokenIdent { + break + } + + id := &ast.Identifier{ + Value: p.curTok.Literal, + QuoteType: "NotQuoted", + } + identifiers = append(identifiers, id) + p.nextToken() + + if p.curTok.Type != TokenDot { + break + } + p.nextToken() // consume dot + } + + if len(identifiers) == 0 { + return nil, fmt.Errorf("expected identifier for schema object name") + } + + // BaseIdentifier is the last identifier + baseId := identifiers[len(identifiers)-1] + + return &ast.SchemaObjectName{ + BaseIdentifier: baseId, + Count: len(identifiers), + Identifiers: identifiers, + }, nil +} + +func (p *Parser) parseOptionClause() ([]*ast.OptimizerHint, error) { + // Consume OPTION + if p.curTok.Type != TokenOption { + return nil, fmt.Errorf("expected OPTION, got %s", p.curTok.Literal) + } + p.nextToken() + + // Consume ( + if p.curTok.Type != TokenLParen { + return nil, fmt.Errorf("expected (, got %s", p.curTok.Literal) + } + p.nextToken() + + var hints []*ast.OptimizerHint + + // Parse hints + for p.curTok.Type != TokenRParen && p.curTok.Type != TokenEOF { + if p.curTok.Type == TokenIdent { + hintKind := convertHintKind(p.curTok.Literal) + hints = append(hints, &ast.OptimizerHint{HintKind: hintKind}) + p.nextToken() + } else if p.curTok.Type == TokenComma { + p.nextToken() + } else { + p.nextToken() + } + } + + // Consume ) + if p.curTok.Type == TokenRParen { + p.nextToken() + } + + return hints, nil +} + +// convertHintKind converts hint identifiers to their canonical names +func convertHintKind(hint string) string { + // Map common hint names + hintMap := map[string]string{ + "IGNORE_NONCLUSTERED_COLUMNSTORE_INDEX": "IgnoreNonClusteredColumnStoreIndex", + } + upper := strings.ToUpper(hint) + if mapped, ok := hintMap[upper]; ok { + return mapped + } + return hint } // jsonNode represents a generic JSON node from the AST JSON format. @@ -80,6 +443,23 @@ func selectStatementToJSON(s *ast.SelectStatement) jsonNode { if s.QueryExpression != nil { node["QueryExpression"] = queryExpressionToJSON(s.QueryExpression) } + if len(s.OptimizerHints) > 0 { + hints := make([]jsonNode, len(s.OptimizerHints)) + for i, h := range s.OptimizerHints { + hints[i] = optimizerHintToJSON(h) + } + node["OptimizerHints"] = hints + } + return node +} + +func optimizerHintToJSON(h *ast.OptimizerHint) jsonNode { + node := jsonNode{ + "$type": "OptimizerHint", + } + if h.HintKind != "" { + node["HintKind"] = h.HintKind + } return node } diff --git a/parser/testdata/OptimizerHintsTests110/metadata.json b/parser/testdata/OptimizerHintsTests110/metadata.json index 49e9182b..0967ef42 100644 --- a/parser/testdata/OptimizerHintsTests110/metadata.json +++ b/parser/testdata/OptimizerHintsTests110/metadata.json @@ -1 +1 @@ -{"skip": true} +{}