Skip to content

Commit f69c739

Browse files
SQL: Fix FORMAT function to better comply with Microsoft SQL Server specification (elastic#86225)
1 parent fd99a50 commit f69c739

File tree

8 files changed

+303
-41
lines changed

8 files changed

+303
-41
lines changed

.editorconfig

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -225,3 +225,6 @@ indent_size = 2
225225

226226
[*.{xsd,xml}]
227227
indent_size = 4
228+
229+
[*.{csv,sql}-spec]
230+
trim_trailing_whitespace = false

docs/changelog/86225.yaml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
pr: 86225
2+
summary: Fix FORMAT function to comply with Microsoft SQL Server specification
3+
area: SQL
4+
type: bug
5+
issues:
6+
- 66560

docs/reference/sql/functions/date-time.asciidoc

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -454,7 +454,7 @@ include-tagged::{sql-specs}/docs/docs.csv-spec[dateParse1]
454454

455455
[NOTE]
456456
====
457-
The resulting `date` will have the time zone specified by the user through the
457+
The resulting `date` will have the time zone specified by the user through the
458458
<<sql-search-api-time-zone,`time_zone`>>/<<jdbc-cfg-timezone,`timezone`>> REST/driver parameters
459459
with no conversion applied.
460460
@@ -810,7 +810,7 @@ SQL Server Format Specification].
810810

811811
[NOTE]
812812
If the 1st argument is of type `time`, then pattern specified by the 2nd argument cannot contain date related units
813-
(e.g. 'dd', 'MM', 'YYYY', etc.). If it contains such units an error is returned. +
813+
(e.g. 'dd', 'MM', 'yyyy', etc.). If it contains such units an error is returned. +
814814
Format specifier `F` will be working similar to format specifier `f`.
815815
It will return the fractional part of seconds, and the number of digits will be same as of the number of `Fs` provided as input (up to 9 digits).
816816
Result will contain `0` appended in the end to match with number of `F` provided.
@@ -862,9 +862,9 @@ Patterns for Date/Time Formatting].
862862
If the 1st argument is of type `time`, then the pattern specified by the 2nd argument cannot contain date related units
863863
(e.g. 'dd', 'MM', 'YYYY', etc.). If it contains such units an error is returned. +
864864
The result of the patterns `TZ` and `tz` (time zone abbreviations) in some cases differ from the results returned by the `TO_CHAR`
865-
in PostgreSQL. The reason is that the time zone abbreviations specified by the JDK are different from the ones specified by PostgreSQL.
866-
This function might show an actual time zone abbreviation instead of the generic `LMT` or empty string or offset returned by the PostgreSQL
867-
implementation. The summer/daylight markers might also differ between the two implementations (e.g. will show `HT` instead of `HST`
865+
in PostgreSQL. The reason is that the time zone abbreviations specified by the JDK are different from the ones specified by PostgreSQL.
866+
This function might show an actual time zone abbreviation instead of the generic `LMT` or empty string or offset returned by the PostgreSQL
867+
implementation. The summer/daylight markers might also differ between the two implementations (e.g. will show `HT` instead of `HST`
868868
for Hawaii). +
869869
The `FX`, `TM`, `SP` pattern modifiers are not supported and will show up as `FX`, `TM`, `SP` literals in the output.
870870

x-pack/plugin/sql/qa/server/src/main/resources/date.csv-spec

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -404,3 +404,46 @@ SELECT emp_no FROM test_emp WHERE DATE_ADD('day', 1, hire_date) = '2021-02-03||-
404404
10044
405405
10085
406406
;
407+
408+
409+
410+
// format
411+
412+
formatNormalPattern
413+
SELECT FORMAT(birth_date, 'dd/MM/yyyy') as x FROM test_emp ORDER BY emp_no LIMIT 1;
414+
x
415+
----------
416+
02/09/1953
417+
;
418+
419+
formatWithDoubleQuoteEscaping
420+
SELECT FORMAT(birth_date, '"yyyy" yyyy') as x FROM test_emp ORDER BY emp_no LIMIT 1;
421+
422+
x
423+
------
424+
yyyy 1953
425+
;
426+
427+
formatSingleQuote
428+
SELECT FORMAT(birth_date, '"''" yyyy') as x FROM test_emp ORDER BY emp_no LIMIT 1;
429+
430+
x
431+
------
432+
' 1953
433+
;
434+
435+
formatQuotesAndAllowedCharacters
436+
SELECT FORMAT(birth_date, 'abc ''yyy'' yyyy') as x FROM test_emp ORDER BY emp_no LIMIT 1;
437+
438+
x
439+
------
440+
abc yyy 1953
441+
;
442+
443+
formatQuotesComplexString
444+
SELECT FORMAT(birth_date, '\t\hi\s i\s \t\h\e \y\ear yyyy an\d \t\h\e \mon\t\h MM') as x FROM test_emp ORDER BY emp_no LIMIT 1;
445+
446+
x
447+
------------------------------------
448+
this is the year 1953 and the month 09
449+
;

x-pack/plugin/sql/qa/server/src/main/resources/datetime.csv-spec

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1173,8 +1173,8 @@ M | 1996-11-05 00:00:00.000Z
11731173

11741174
selectFormat
11751175
schema::format_date:s|format_datetime:s|format_time:s
1176-
SELECT FORMAT('2020-04-05T11:22:33.123Z'::date, 'dd/MM/YYYY HH:mm:ss.fff') AS format_date,
1177-
FORMAT('2020-04-05T11:22:33.123Z'::datetime, 'dd/MM/YYYY HH:mm:ss.ff') AS format_datetime,
1176+
SELECT FORMAT('2020-04-05T11:22:33.123Z'::date, 'dd/MM/yyyy HH:mm:ss.fff') AS format_date,
1177+
FORMAT('2020-04-05T11:22:33.123Z'::datetime, 'dd/MM/yyyy HH:mm:ss.ff') AS format_datetime,
11781178
FORMAT('11:22:33.123456789Z'::time, 'HH:mm:ss.ff') AS format_time;
11791179

11801180
format_date | format_datetime | format_time
@@ -1184,8 +1184,8 @@ FORMAT('11:22:33.123456789Z'::time, 'HH:mm:ss.ff') AS format_time;
11841184

11851185
selectFormatWithLength
11861186
schema::format_datetime:s|length:i
1187-
SELECT FORMAT('2020-04-05T11:22:33.123Z'::datetime, 'dd/MM/YYYY HH:mm:ss.ff') AS format_datetime,
1188-
LENGTH(FORMAT('2020-04-05T11:22:33.123Z'::datetime, 'dd/MM/YYYY HH:mm:ss.ff')) AS length;
1187+
SELECT FORMAT('2020-04-05T11:22:33.123Z'::datetime, 'dd/MM/yyyy HH:mm:ss.ff') AS format_datetime,
1188+
LENGTH(FORMAT('2020-04-05T11:22:33.123Z'::datetime, 'dd/MM/yyyy HH:mm:ss.ff')) AS length;
11891189

11901190
format_datetime | length
11911191
------------------------+----------------
@@ -1194,7 +1194,7 @@ LENGTH(FORMAT('2020-04-05T11:22:33.123Z'::datetime, 'dd/MM/YYYY HH:mm:ss.ff')) A
11941194

11951195
selectFormatWithField
11961196
schema::birth_date:ts|format_birth_date1:s|format_birth_date2:s|emp_no:i
1197-
SELECT birth_date, FORMAT(birth_date, 'MM/dd/YYYY') AS format_birth_date1, FORMAT(birth_date, concat(gender, 'M/dd')) AS format_birth_date2, emp_no
1197+
SELECT birth_date, FORMAT(birth_date, 'MM/dd/yyyy') AS format_birth_date1, FORMAT(birth_date, concat(gender, 'M/dd')) AS format_birth_date2, emp_no
11981198
FROM test_emp WHERE gender = 'M' AND emp_no BETWEEN 10037 AND 10052 ORDER BY emp_no;
11991199

12001200
birth_date | format_birth_date1 | format_birth_date2 | emp_no
@@ -1233,7 +1233,7 @@ WHERE FORMAT(birth_date, 'MM')::integer > 10 ORDER BY emp_no LIMIT 10;
12331233

12341234
formatOrderBy
12351235
schema::birth_date:ts|format_birth_date:s
1236-
SELECT birth_date, FORMAT(birth_date, 'MM/dd/YYYY') AS format_birth_date FROM test_emp ORDER BY 2 DESC NULLS LAST LIMIT 10;
1236+
SELECT birth_date, FORMAT(birth_date, 'MM/dd/yyyy') AS format_birth_date FROM test_emp ORDER BY 2 DESC NULLS LAST LIMIT 10;
12371237

12381238
birth_date | format_birth_date
12391239
-------------------------+---------------

x-pack/plugin/sql/qa/server/src/main/resources/docs/docs.csv-spec

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3190,7 +3190,7 @@ SELECT DATE_TRUNC('days', INTERVAL '19 15:24:19' DAY TO SECONDS) AS day;
31903190

31913191
formatDate
31923192
// tag::formatDate
3193-
SELECT FORMAT(CAST('2020-04-05' AS DATE), 'dd/MM/YYYY') AS "date";
3193+
SELECT FORMAT(CAST('2020-04-05' AS DATE), 'dd/MM/yyyy') AS "date";
31943194

31953195
date
31963196
------------------
@@ -3200,7 +3200,7 @@ SELECT FORMAT(CAST('2020-04-05' AS DATE), 'dd/MM/YYYY') AS "date";
32003200

32013201
formatDateTime
32023202
// tag::formatDateTime
3203-
SELECT FORMAT(CAST('2020-04-05T11:22:33.987654' AS DATETIME), 'dd/MM/YYYY HH:mm:ss.ff') AS "datetime";
3203+
SELECT FORMAT(CAST('2020-04-05T11:22:33.987654' AS DATETIME), 'dd/MM/yyyy HH:mm:ss.ff') AS "datetime";
32043204

32053205
datetime
32063206
------------------

x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/expression/function/scalar/datetime/DateTimeFormatProcessor.java

Lines changed: 127 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -20,14 +20,50 @@
2020
import java.time.temporal.TemporalAccessor;
2121
import java.util.Locale;
2222
import java.util.Objects;
23+
import java.util.Set;
2324
import java.util.function.Function;
2425

2526
import static org.elasticsearch.xpack.sql.util.DateUtils.asTimeAtZone;
2627

2728
public class DateTimeFormatProcessor extends BinaryDateTimeProcessor {
2829

2930
public static final String NAME = "dtformat";
30-
private static final String[][] JAVA_TIME_FORMAT_REPLACEMENTS = {
31+
32+
/**
33+
* these characters have a meaning in MS date patterns.
34+
* If a character is not in this set, then it's still allowed in MS FORMAT patters
35+
* but not in Java, so it has to be translated or quoted
36+
*/
37+
private static final Set<Character> MS_DATETIME_PATTERN_CHARS = Set.of(
38+
'd',
39+
'f',
40+
'F',
41+
'g',
42+
'h',
43+
'H',
44+
'K',
45+
'm',
46+
'M',
47+
's',
48+
't',
49+
'y',
50+
'z',
51+
':',
52+
'/',
53+
' ',
54+
'-'
55+
);
56+
57+
/**
58+
* characters that start a quoting block in MS patterns
59+
*/
60+
private static final Set<Character> MS_QUOTING_CHARS = Set.of('\\', '\'', '"');
61+
62+
/**
63+
* list of MS datetime patterns with the corresponding translation in Java DateTimeFormat
64+
* (patterns that are the same in Java and in MS are not listed here)
65+
*/
66+
private static final String[][] MS_TO_JAVA_PATTERNS = {
3167
{ "tt", "a" },
3268
{ "t", "a" },
3369
{ "dddd", "eeee" },
@@ -47,10 +83,7 @@ protected Function<TemporalAccessor, String> formatterFor(String pattern) {
4783
if (pattern.isEmpty()) {
4884
return null;
4985
}
50-
for (String[] replacement : JAVA_TIME_FORMAT_REPLACEMENTS) {
51-
pattern = pattern.replace(replacement[0], replacement[1]);
52-
}
53-
final String javaPattern = pattern;
86+
final String javaPattern = msToJavaPattern(pattern);
5487
return DateTimeFormatter.ofPattern(javaPattern, Locale.ROOT)::format;
5588
}
5689
},
@@ -67,6 +100,95 @@ protected Function<TemporalAccessor, String> formatterFor(String pattern) {
67100
}
68101
};
69102

103+
protected static String msToJavaPattern(String pattern) {
104+
StringBuilder result = new StringBuilder(pattern.length());
105+
StringBuilder partialQuotedString = new StringBuilder();
106+
107+
boolean originalCharacterQuoted = false;
108+
boolean lastTargetCharacterQuoted = false;
109+
char quotingChar = '\\';
110+
111+
for (int i = 0; i < pattern.length(); i++) {
112+
char c = pattern.charAt(i);
113+
if (originalCharacterQuoted) {
114+
if (quotingChar == '\\') {
115+
// in the original pattern, this is a single quoted character, add it to the partial string
116+
// that will be quoted in Java
117+
originalCharacterQuoted = false;
118+
lastTargetCharacterQuoted = true;
119+
partialQuotedString.append(c);
120+
} else if (c == quotingChar) {
121+
// the original pattern is closing the quoting,
122+
// do nothing for now, next character could open a new quoting block
123+
originalCharacterQuoted = false;
124+
} else {
125+
// any character that is not a quoting char is just added to the partial quoting string
126+
// because there could be more characters to quote after that
127+
partialQuotedString.append(c);
128+
}
129+
} else {
130+
boolean characterProcessed = false;
131+
// the original pattern is not quoting
132+
if (MS_QUOTING_CHARS.contains(c)) {
133+
// next character(s) is quoted, start a quoted block on the target
134+
originalCharacterQuoted = true;
135+
lastTargetCharacterQuoted = true;
136+
quotingChar = c;
137+
characterProcessed = true;
138+
} else {
139+
// manage patterns that are different from MS to Java and have to be translated
140+
for (String[] item : MS_TO_JAVA_PATTERNS) {
141+
int fragmentLength = item[0].length();
142+
if (i + fragmentLength <= pattern.length() && item[0].equals(pattern.substring(i, i + fragmentLength))) {
143+
if (lastTargetCharacterQuoted) {
144+
// now origin is not quoting for sure and the next block is a valid datetime pattern,
145+
// that has to be translated and written as is (not quoted).
146+
// Before doing this, let's flush the previously quoted string
147+
// and quote it properly with Java syntax
148+
lastTargetCharacterQuoted = false;
149+
quoteAndAppend(result, partialQuotedString);
150+
partialQuotedString = new StringBuilder();
151+
}
152+
// and then translate the pattern
153+
result.append(item[1]);
154+
characterProcessed = true;
155+
i += (fragmentLength - 1); // fast-forward, because the replaced pattern could be longer than one character
156+
break;
157+
}
158+
}
159+
}
160+
if (characterProcessed == false) {
161+
if (MS_DATETIME_PATTERN_CHARS.contains(c) == false) {
162+
// this character is allowed in MS, but not in Java, so it has to be quoted in the result
163+
lastTargetCharacterQuoted = true;
164+
partialQuotedString.append(c);
165+
} else {
166+
// any other character is a valid datetime pattern in both Java and MS
167+
if (lastTargetCharacterQuoted) {
168+
// flush the quoted string first, if any
169+
lastTargetCharacterQuoted = false;
170+
quoteAndAppend(result, partialQuotedString);
171+
partialQuotedString = new StringBuilder();
172+
}
173+
// and then add the character itself, as it is
174+
result.append(c);
175+
}
176+
}
177+
}
178+
}
179+
// if the original pattern ended with a quoted block, flush it to the result and quote it in Java
180+
if (lastTargetCharacterQuoted) {
181+
quoteAndAppend(result, partialQuotedString);
182+
}
183+
return result.toString();
184+
}
185+
186+
private static void quoteAndAppend(StringBuilder mainBuffer, StringBuilder fragmentToQuote) {
187+
mainBuffer.append("'");
188+
mainBuffer.append(fragmentToQuote.toString().replaceAll("'", "''"));
189+
mainBuffer.append("'");
190+
}
191+
70192
protected abstract Function<TemporalAccessor, String> formatterFor(String pattern);
71193

72194
public Object format(Object timestamp, Object pattern, ZoneId zoneId) {

0 commit comments

Comments
 (0)