Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
92 changes: 92 additions & 0 deletions internal/normalize/normalize.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,31 @@ var (
notLowerParenRegex = regexp.MustCompile(`\bnot\s*\((\d+)\)`)
isNotNullParenRegex = regexp.MustCompile(`\((\w+)\s+IS\s+NOT\s+NULL\)`)
isNullParenRegex = regexp.MustCompile(`\((\w+)\s+IS\s+NULL\)`)
// Alias AS normalization: remove optional AS keyword in alias contexts
// Matches: expr AS alias (where expr ends with word/digit/closing paren)
aliasAsRegex = regexp.MustCompile(`(\d+|\)|\w)\s+AS\s+(\w)`)
// ORDER BY single column parentheses normalization
// ORDER BY (col) -> ORDER BY col
orderBySingleParenRegex = regexp.MustCompile(`(?i)\bORDER BY\s+\((\w+)\)`)
// PRIMARY KEY single column parentheses normalization
// PRIMARY KEY (col) -> PRIMARY KEY col
primaryKeySingleParenRegex = regexp.MustCompile(`(?i)\bPRIMARY KEY\s+\((\w+)\)`)
// Parentheses around IN expressions: (x IN(...)) -> x IN(...)
// Handles both with and without space after IN
// Must be preceded by space or comma (not a function call like sum(x IN ...))
parenInExprRegex = regexp.MustCompile(`([\s,])\((\w+\s*IN\s*\([^)]*\))\)`)
// LIMIT syntax normalization: LIMIT offset, count -> LIMIT count OFFSET offset
limitCommaRegex = regexp.MustCompile(`(?i)\bLIMIT\s+(\d+)\s*,\s*(\d+)\b`)
// Spaces around dots in identifiers: system . one -> system.one
spaceDotSpaceRegex = regexp.MustCompile(`(\w)\s*\.\s*(\w)`)
// Trailing .0 in float literals: 1.0 -> 1
trailingDotZeroRegex = regexp.MustCompile(`\b(\d+)\.0+\b`)
// Add spaces around arithmetic operators: num/2 -> num / 2, 1+1 -> 1 + 1, 1+-a -> 1 + -a
// Match when operator is between word chars or ), or word and - (for unary minus)
arithmeticNoSpaceRegex = regexp.MustCompile(`([\w)])([/*%+])([\w-])`)
// Add spaces around binary minus: x-1 -> x - 1 (but not -1 which is unary)
// Match when ) or word is directly followed by - and then a word/digit
binaryMinusNoSpaceRegex = regexp.MustCompile(`([\w)])-([\w])`)
)

// DecodeHexEscapes decodes \xNN escape sequences in a string to raw bytes.
Expand Down Expand Up @@ -93,6 +118,53 @@ func EscapesInStrings(s string) string {
// Escaped backslash \\ -> single backslash \
result.WriteByte('\\')
i += 2
} else if ch == '\\' && i+1 < len(s) && s[i+1] == 't' {
// Escaped tab \t -> actual tab
result.WriteByte('\t')
i += 2
} else if ch == '\\' && i+1 < len(s) && s[i+1] == 'n' {
// Escaped newline \n -> actual newline
result.WriteByte('\n')
i += 2
} else if ch == '\\' && i+1 < len(s) && s[i+1] == 'r' {
// Escaped carriage return \r -> actual carriage return
result.WriteByte('\r')
i += 2
} else if ch == '\\' && i+1 < len(s) && s[i+1] == 'a' {
// Escaped alert \a -> actual alert (bell)
result.WriteByte('\a')
i += 2
} else if ch == '\\' && i+1 < len(s) && s[i+1] == 'b' {
// Escaped backspace \b -> actual backspace
result.WriteByte('\b')
i += 2
} else if ch == '\\' && i+1 < len(s) && s[i+1] == 'f' {
// Escaped form feed \f -> actual form feed
result.WriteByte('\f')
i += 2
} else if ch == '\\' && i+1 < len(s) && s[i+1] == 'v' {
// Escaped vertical tab \v -> actual vertical tab
result.WriteByte('\v')
i += 2
} else if ch == '\\' && i+1 < len(s) && s[i+1] == '?' {
// Escaped question mark \? -> actual question mark
result.WriteByte('?')
i += 2
} else if ch == '\\' && i+1 < len(s) && s[i+1] == '"' {
// Escaped double quote \" -> actual double quote
result.WriteByte('"')
i += 2
} else if ch == '\\' && i+3 < len(s) && s[i+1] == 'x' {
// Hex escape \xNN -> decoded byte
hexStr := s[i+2 : i+4]
b, err := hex.DecodeString(hexStr)
if err == nil && len(b) == 1 {
result.WriteByte(b[0])
i += 4
} else {
result.WriteByte(ch)
i++
}
} else if ch == '\'' {
// Either end of string or escaped quote
result.WriteByte(ch)
Expand Down Expand Up @@ -191,6 +263,9 @@ func ForFormat(s string) string {
normalized = doubleQuotedIdentRegex.ReplaceAllString(normalized, "$1$2")
// Normalize AS keyword case: as -> AS
normalized = asKeywordRegex.ReplaceAllString(normalized, "AS")
// Remove optional AS keyword in alias contexts (1 AS x -> 1 x)
// This handles the equivalence of "expr AS alias" and "expr alias"
normalized = aliasAsRegex.ReplaceAllString(normalized, "$1 $2")
// Remove leading zeros from integer literals (077 -> 77)
normalized = leadingZerosRegex.ReplaceAllString(normalized, "$1")
// Normalize heredocs ($$...$$ -> '...')
Expand Down Expand Up @@ -225,6 +300,9 @@ func ForFormat(s string) string {
normalized = regexpOperatorRegex.ReplaceAllString(normalized, "match($1,$2)")
// Normalize ORDER BY () to ORDER BY tuple()
normalized = orderByEmptyRegex.ReplaceAllString(normalized, "ORDER BY tuple()")
// Remove parentheses around IN expressions BEFORE removing spaces
// (x IN (...)) -> x IN (...) - this must be done before spaceBeforeParenRegex
normalized = parenInExprRegex.ReplaceAllString(normalized, "$1$2")
// Normalize INSERT INTO table (cols) to have no space before ( (or consistent spacing)
// This matches "tablename (" and removes the space: "tablename("
normalized = spaceBeforeParenRegex.ReplaceAllString(normalized, "$1($2")
Expand All @@ -239,6 +317,20 @@ func ForFormat(s string) string {
// This handles both standalone (x IS NULL) and inside lambdas x -> (x IS NULL)
normalized = isNotNullParenRegex.ReplaceAllString(normalized, "$1 IS NOT NULL")
normalized = isNullParenRegex.ReplaceAllString(normalized, "$1 IS NULL")
// Normalize ORDER BY (col) to ORDER BY col
normalized = orderBySingleParenRegex.ReplaceAllString(normalized, "ORDER BY $1")
// Normalize PRIMARY KEY (col) to PRIMARY KEY col
normalized = primaryKeySingleParenRegex.ReplaceAllString(normalized, "PRIMARY KEY $1")
// Normalize LIMIT offset, count to LIMIT count OFFSET offset
normalized = limitCommaRegex.ReplaceAllString(normalized, "LIMIT $2 OFFSET $1")
// Normalize spaces around dots in identifiers: system . one -> system.one
normalized = spaceDotSpaceRegex.ReplaceAllString(normalized, "$1.$2")
// Normalize trailing .0 in float literals: 1.0 -> 1
normalized = trailingDotZeroRegex.ReplaceAllString(normalized, "$1")
// Add spaces around arithmetic operators (/, *, %): num/2 -> num / 2
normalized = arithmeticNoSpaceRegex.ReplaceAllString(normalized, "$1 $2 $3")
// Add spaces around binary minus: x-1 -> x - 1
normalized = binaryMinusNoSpaceRegex.ReplaceAllString(normalized, "$1 - $2")
// Re-normalize whitespace after replacements
normalized = Whitespace(normalized)
// Strip trailing semicolon and any spaces before it
Expand Down
Original file line number Diff line number Diff line change
@@ -1 +1 @@
{"todo_format":true}
{}
2 changes: 1 addition & 1 deletion parser/testdata/00066_group_by_in/metadata.json
Original file line number Diff line number Diff line change
@@ -1 +1 @@
{"todo_format":true}
{}
Original file line number Diff line number Diff line change
@@ -1 +1 @@
{"todo_format":true}
{}
Original file line number Diff line number Diff line change
@@ -1 +1 @@
{"todo_format":true}
{}
2 changes: 1 addition & 1 deletion parser/testdata/00113_shard_group_array/metadata.json
Original file line number Diff line number Diff line change
@@ -1 +1 @@
{"todo_format":true}
{}
2 changes: 1 addition & 1 deletion parser/testdata/00118_storage_join/metadata.json
Original file line number Diff line number Diff line change
@@ -1 +1 @@
{"todo_format":true}
{}
2 changes: 1 addition & 1 deletion parser/testdata/00120_join_and_group_by/metadata.json
Original file line number Diff line number Diff line change
@@ -1 +1 @@
{"todo_format":true}
{}
Original file line number Diff line number Diff line change
@@ -1 +1 @@
{"todo_format":true}
{}
Original file line number Diff line number Diff line change
@@ -1 +1 @@
{"todo_format":true}
{}
Original file line number Diff line number Diff line change
@@ -1 +1 @@
{"todo_format":true}
{}
Original file line number Diff line number Diff line change
@@ -1 +1 @@
{"todo_format":true}
{}
2 changes: 1 addition & 1 deletion parser/testdata/00192_least_greatest/metadata.json
Original file line number Diff line number Diff line change
@@ -1 +1 @@
{"todo_format":true}
{}
2 changes: 1 addition & 1 deletion parser/testdata/00218_like_regexp_newline/metadata.json
Original file line number Diff line number Diff line change
@@ -1 +1 @@
{"todo_format":true}
{}
2 changes: 1 addition & 1 deletion parser/testdata/00225_join_duplicate_columns/metadata.json
Original file line number Diff line number Diff line change
@@ -1 +1 @@
{"todo_format":true}
{}
2 changes: 1 addition & 1 deletion parser/testdata/00231_format_vertical_raw/metadata.json
Original file line number Diff line number Diff line change
@@ -1 +1 @@
{"todo_format":true}
{}
2 changes: 1 addition & 1 deletion parser/testdata/00251_has_types/metadata.json
Original file line number Diff line number Diff line change
@@ -1 +1 @@
{"todo_format":true}
{}
Original file line number Diff line number Diff line change
@@ -1 +1 @@
{"todo_format":true}
{}
Original file line number Diff line number Diff line change
@@ -1 +1 @@
{"todo_format":true}
{}
2 changes: 1 addition & 1 deletion parser/testdata/00298_enum_width_and_cast/metadata.json
Original file line number Diff line number Diff line change
@@ -1 +1 @@
{"todo_format":true}
{}
2 changes: 1 addition & 1 deletion parser/testdata/00319_index_for_like/metadata.json
Original file line number Diff line number Diff line change
@@ -1 +1 @@
{"todo_format":true}
{}
Original file line number Diff line number Diff line change
@@ -1 +1 @@
{"todo_format":true}
{}
2 changes: 1 addition & 1 deletion parser/testdata/00353_join_by_tuple/metadata.json
Original file line number Diff line number Diff line change
@@ -1 +1 @@
{"todo_format":true}
{}
2 changes: 1 addition & 1 deletion parser/testdata/00374_any_last_if_merge/metadata.json
Original file line number Diff line number Diff line change
@@ -1 +1 @@
{"todo_format":true}
{}
Original file line number Diff line number Diff line change
@@ -1 +1 @@
{"todo_format":true}
{}
Original file line number Diff line number Diff line change
@@ -1 +1 @@
{"todo_format":true}
{}
Original file line number Diff line number Diff line change
@@ -1 +1 @@
{"todo_format":true}
{}
Original file line number Diff line number Diff line change
@@ -1 +1 @@
{"todo_format":true}
{}
Original file line number Diff line number Diff line change
@@ -1 +1 @@
{"todo_format":true}
{}
Original file line number Diff line number Diff line change
@@ -1 +1 @@
{"todo_format":true}
{}
2 changes: 1 addition & 1 deletion parser/testdata/00529_orantius/metadata.json
Original file line number Diff line number Diff line change
@@ -1 +1 @@
{"todo_format":true}
{}
2 changes: 1 addition & 1 deletion parser/testdata/00562_in_subquery_merge_tree/metadata.json
Original file line number Diff line number Diff line change
@@ -1 +1 @@
{"todo_format":true}
{}
Original file line number Diff line number Diff line change
@@ -1 +1 @@
{"todo_format":true}
{}
Original file line number Diff line number Diff line change
@@ -1 +1 @@
{"todo_format":true}
{}
2 changes: 1 addition & 1 deletion parser/testdata/00647_histogram/metadata.json
Original file line number Diff line number Diff line change
@@ -1 +1 @@
{"todo_format":true}
{}
Original file line number Diff line number Diff line change
@@ -1 +1 @@
{"todo_format":true}
{}
2 changes: 1 addition & 1 deletion parser/testdata/00700_decimal_gathers/metadata.json
Original file line number Diff line number Diff line change
@@ -1 +1 @@
{"todo_format":true}
{}
Original file line number Diff line number Diff line change
@@ -1 +1 @@
{"todo_format":true}
{}
Original file line number Diff line number Diff line change
@@ -1 +1 @@
{"todo_format":true}
{}
2 changes: 1 addition & 1 deletion parser/testdata/00712_prewhere_with_sampling/metadata.json
Original file line number Diff line number Diff line change
@@ -1 +1 @@
{"todo_format":true}
{}
2 changes: 1 addition & 1 deletion parser/testdata/00715_bounding_ratio/metadata.json
Original file line number Diff line number Diff line change
@@ -1 +1 @@
{"todo_format":true}
{}
2 changes: 1 addition & 1 deletion parser/testdata/00717_default_join_type/metadata.json
Original file line number Diff line number Diff line change
@@ -1 +1 @@
{"todo_format":true}
{}
Original file line number Diff line number Diff line change
@@ -1 +1 @@
{"todo_format":true}
{}
2 changes: 1 addition & 1 deletion parser/testdata/00722_inner_join/metadata.json
Original file line number Diff line number Diff line change
@@ -1 +1 @@
{"todo_format":true}
{}
2 changes: 1 addition & 1 deletion parser/testdata/00732_base64_functions/metadata.json
Original file line number Diff line number Diff line change
@@ -1 +1 @@
{"todo_format":true}
{}
Original file line number Diff line number Diff line change
@@ -1 +1 @@
{"todo_format":true}
{}
2 changes: 1 addition & 1 deletion parser/testdata/00762_date_comparsion/metadata.json
Original file line number Diff line number Diff line change
@@ -1 +1 @@
{"todo_format":true}
{}
2 changes: 1 addition & 1 deletion parser/testdata/00780_unaligned_array_join/metadata.json
Original file line number Diff line number Diff line change
@@ -1 +1 @@
{"todo_format":true}
{}
2 changes: 1 addition & 1 deletion parser/testdata/00818_alias_bug_4110/metadata.json
Original file line number Diff line number Diff line change
@@ -1 +1 @@
{"todo_format":true}
{}
Original file line number Diff line number Diff line change
@@ -1 +1 @@
{"todo_format":true}
{}
2 changes: 1 addition & 1 deletion parser/testdata/00927_asof_join_correct_bt/metadata.json
Original file line number Diff line number Diff line change
@@ -1 +1 @@
{"todo_format":true}
{}
Original file line number Diff line number Diff line change
@@ -1 +1 @@
{"todo_format":true}
{}
2 changes: 1 addition & 1 deletion parser/testdata/00979_toFloat_monotonicity/metadata.json
Original file line number Diff line number Diff line change
@@ -1 +1 @@
{"todo_format":true}
{}
2 changes: 1 addition & 1 deletion parser/testdata/00990_request_splitting/metadata.json
Original file line number Diff line number Diff line change
@@ -1 +1 @@
{"todo_format":true}
{}
Original file line number Diff line number Diff line change
@@ -1 +1 @@
{"todo_format":true}
{}
2 changes: 1 addition & 1 deletion parser/testdata/01012_select_limit_x_0/metadata.json
Original file line number Diff line number Diff line change
@@ -1 +1 @@
{"todo_format":true}
{}
Original file line number Diff line number Diff line change
@@ -1 +1 @@
{"todo_format":true}
{}
Original file line number Diff line number Diff line change
@@ -1 +1 @@
{"todo_format":true}
{}
Original file line number Diff line number Diff line change
@@ -1 +1 @@
{"todo_format":true}
{}
2 changes: 1 addition & 1 deletion parser/testdata/01065_if_not_finite/metadata.json
Original file line number Diff line number Diff line change
@@ -1 +1 @@
{"todo_format":true}
{}
Original file line number Diff line number Diff line change
@@ -1 +1 @@
{"todo_format":true}
{}
Original file line number Diff line number Diff line change
@@ -1 +1 @@
{"todo_format":true}
{}
2 changes: 1 addition & 1 deletion parser/testdata/01080_join_get_null/metadata.json
Original file line number Diff line number Diff line change
@@ -1 +1 @@
{"todo_format":true}
{}
Original file line number Diff line number Diff line change
@@ -1 +1 @@
{"todo_format":true}
{}
Original file line number Diff line number Diff line change
@@ -1 +1 @@
{"todo_format":true}
{}
2 changes: 1 addition & 1 deletion parser/testdata/01100_split_by_string/metadata.json
Original file line number Diff line number Diff line change
@@ -1 +1 @@
{"todo_format":true}
{}
2 changes: 1 addition & 1 deletion parser/testdata/01118_is_constant/metadata.json
Original file line number Diff line number Diff line change
@@ -1 +1 @@
{"todo_format":true}
{}
2 changes: 1 addition & 1 deletion parser/testdata/01137_sample_final/metadata.json
Original file line number Diff line number Diff line change
@@ -1 +1 @@
{"todo_format":true}
{}
2 changes: 1 addition & 1 deletion parser/testdata/01139_asof_join_types/metadata.json
Original file line number Diff line number Diff line change
@@ -1 +1 @@
{"todo_format":true}
{}
Original file line number Diff line number Diff line change
@@ -1 +1 @@
{"todo_format":true}
{}
2 changes: 1 addition & 1 deletion parser/testdata/01234_to_string_monotonic/metadata.json
Original file line number Diff line number Diff line change
@@ -1 +1 @@
{"todo_format":true}
{}
Original file line number Diff line number Diff line change
@@ -1 +1 @@
{"todo_format":true}
{}
Original file line number Diff line number Diff line change
@@ -1 +1 @@
{"todo_format":true}
{}
Original file line number Diff line number Diff line change
@@ -1 +1 @@
{"todo_format":true}
{}
Original file line number Diff line number Diff line change
@@ -1 +1 @@
{"todo_format":true}
{}
Original file line number Diff line number Diff line change
@@ -1 +1 @@
{"todo_format":true}
{}
Original file line number Diff line number Diff line change
@@ -1 +1 @@
{"todo_format":true}
{}
Original file line number Diff line number Diff line change
@@ -1 +1 @@
{"todo_format":true}
{}
2 changes: 1 addition & 1 deletion parser/testdata/01428_hash_set_nan_key/metadata.json
Original file line number Diff line number Diff line change
@@ -1 +1 @@
{"todo_format":true}
{}
2 changes: 1 addition & 1 deletion parser/testdata/01429_join_on_error_messages/metadata.json
Original file line number Diff line number Diff line change
@@ -1 +1 @@
{"todo_format":true}
{}
2 changes: 1 addition & 1 deletion parser/testdata/01440_big_int_exotic_casts/metadata.json
Original file line number Diff line number Diff line change
@@ -1 +1 @@
{"todo_format":true}
{}
2 changes: 1 addition & 1 deletion parser/testdata/01440_big_int_shift/metadata.json
Original file line number Diff line number Diff line change
@@ -1 +1 @@
{"todo_format":true}
{}
2 changes: 1 addition & 1 deletion parser/testdata/01457_int256_hashing/metadata.json
Original file line number Diff line number Diff line change
@@ -1 +1 @@
{"todo_format":true}
{}
2 changes: 1 addition & 1 deletion parser/testdata/01459_decimal_casts/metadata.json
Original file line number Diff line number Diff line change
@@ -1 +1 @@
{"todo_format":true}
{}
2 changes: 1 addition & 1 deletion parser/testdata/01474_decimal_scale_bug/metadata.json
Original file line number Diff line number Diff line change
@@ -1 +1 @@
{"todo_format":true}
{}
2 changes: 1 addition & 1 deletion parser/testdata/01479_cross_join_9855/metadata.json
Original file line number Diff line number Diff line change
@@ -1 +1 @@
{"todo_format":true}
{}
2 changes: 1 addition & 1 deletion parser/testdata/01486_json_array_output/metadata.json
Original file line number Diff line number Diff line change
@@ -1 +1 @@
{"todo_format":true}
{}
2 changes: 1 addition & 1 deletion parser/testdata/01550_create_map_type/metadata.json
Original file line number Diff line number Diff line change
@@ -1 +1 @@
{"todo_format":true}
{}
2 changes: 1 addition & 1 deletion parser/testdata/01558_ttest/metadata.json
Original file line number Diff line number Diff line change
@@ -1 +1 @@
{"todo_format":true}
{}
Original file line number Diff line number Diff line change
@@ -1 +1 @@
{"todo_format":true}
{}
Original file line number Diff line number Diff line change
@@ -1 +1 @@
{"todo_format":true}
{}
2 changes: 1 addition & 1 deletion parser/testdata/01596_full_join_chertus/metadata.json
Original file line number Diff line number Diff line change
@@ -1 +1 @@
{"todo_format":true}
{}
Loading