diff --git a/sql/api/src/main/scala/org/apache/spark/sql/types/AbstractDataType.scala b/sql/api/src/main/scala/org/apache/spark/sql/types/AbstractDataType.scala index a87482e88139f..039d33a53bf49 100644 --- a/sql/api/src/main/scala/org/apache/spark/sql/types/AbstractDataType.scala +++ b/sql/api/src/main/scala/org/apache/spark/sql/types/AbstractDataType.scala @@ -160,6 +160,16 @@ private[sql] object AnyTimestampType extends AbstractDataType with Serializable override private[sql] def simpleString = "(timestamp or timestamp without time zone)" } +private[sql] object AnyTimestampNanoType extends AbstractDataType with Serializable { + override private[sql] def defaultConcreteType: DataType = TimestampNTZNanosType() + + override private[sql] def acceptsType(other: DataType): Boolean = + other.isInstanceOf[TimestampLTZNanosType] || other.isInstanceOf[TimestampNTZNanosType] + + override private[sql] def simpleString = + "(timestamp_ltz(p) or timestamp_ntz(p) with p in [7, 9])" +} + private[sql] abstract class DatetimeType extends AtomicType /** diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/AnsiGetDateFieldOperationsTypeCoercion.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/AnsiGetDateFieldOperationsTypeCoercion.scala index 7be18d0e44e08..255dde8384691 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/AnsiGetDateFieldOperationsTypeCoercion.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/AnsiGetDateFieldOperationsTypeCoercion.scala @@ -18,7 +18,7 @@ package org.apache.spark.sql.catalyst.analysis import org.apache.spark.sql.catalyst.expressions.{Cast, Expression, GetDateField} -import org.apache.spark.sql.types.{AnyTimestampTypeExpression, DateType} +import org.apache.spark.sql.types.{AnyTimestampNanoTypeExpression, AnyTimestampTypeExpression, DateType} /** * ANSI type coercion helper that matches against [[GetDateField]] expressions in order to type @@ -26,7 +26,9 @@ import org.apache.spark.sql.types.{AnyTimestampTypeExpression, DateType} */ object AnsiGetDateFieldOperationsTypeCoercion { def apply(expression: Expression): Expression = expression match { - case g: GetDateField if AnyTimestampTypeExpression.unapply(g.child) => + case g: GetDateField + if AnyTimestampTypeExpression.unapply(g.child) || + AnyTimestampNanoTypeExpression.unapply(g.child) => g.withNewChildren(Seq(Cast(g.child, DateType))) case other => other diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala index 922d45c4654cb..f7b019a874553 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala @@ -544,15 +544,21 @@ trait GetDateField extends UnaryExpression with ImplicitCastInputTypes { } } +// scalastyle:off line.contains.tab @ExpressionDescription( usage = "_FUNC_(date) - Returns the day of year of the date/timestamp.", examples = """ Examples: > SELECT _FUNC_('2016-04-09'); 100 + > SET spark.sql.timestampNanosTypes.enabled=true; + spark.sql.timestampNanosTypes.enabled true + > SELECT _FUNC_(TIMESTAMP_NTZ '2016-04-09 12:00:00.123456789'); + 100 """, group = "datetime_funcs", since = "1.5.0") +// scalastyle:on line.contains.tab case class DayOfYear(child: Expression) extends GetDateField { override val func = DateTimeUtils.getDayInYear override val funcName = "getDayInYear" @@ -848,15 +854,21 @@ case class UnixMicros(child: Expression) extends TimestampToLongBase { copy(child = newChild) } +// scalastyle:off line.contains.tab @ExpressionDescription( usage = "_FUNC_(date) - Returns the year component of the date/timestamp.", examples = """ Examples: > SELECT _FUNC_('2016-07-30'); 2016 + > SET spark.sql.timestampNanosTypes.enabled=true; + spark.sql.timestampNanosTypes.enabled true + > SELECT _FUNC_(TIMESTAMP_NTZ '2016-07-30 12:00:00.123456789'); + 2016 """, group = "datetime_funcs", since = "1.5.0") +// scalastyle:on line.contains.tab case class Year(child: Expression) extends GetDateField { override val func = DateTimeUtils.getYear override val funcName = "getYear" @@ -871,15 +883,21 @@ case class YearOfWeek(child: Expression) extends GetDateField { copy(child = newChild) } +// scalastyle:off line.contains.tab @ExpressionDescription( usage = "_FUNC_(date) - Returns the quarter of the year for date, in the range 1 to 4.", examples = """ Examples: > SELECT _FUNC_('2016-08-31'); 3 + > SET spark.sql.timestampNanosTypes.enabled=true; + spark.sql.timestampNanosTypes.enabled true + > SELECT _FUNC_(TIMESTAMP_NTZ '2016-08-31 12:00:00.123456789'); + 3 """, group = "datetime_funcs", since = "1.5.0") +// scalastyle:on line.contains.tab case class Quarter(child: Expression) extends GetDateField { override val func = DateTimeUtils.getQuarter override val funcName = "getQuarter" @@ -887,30 +905,42 @@ case class Quarter(child: Expression) extends GetDateField { copy(child = newChild) } +// scalastyle:off line.contains.tab @ExpressionDescription( usage = "_FUNC_(date) - Returns the month component of the date/timestamp.", examples = """ Examples: > SELECT _FUNC_('2016-07-30'); 7 + > SET spark.sql.timestampNanosTypes.enabled=true; + spark.sql.timestampNanosTypes.enabled true + > SELECT _FUNC_(TIMESTAMP_NTZ '2016-07-30 12:00:00.123456789'); + 7 """, group = "datetime_funcs", since = "1.5.0") +// scalastyle:on line.contains.tab case class Month(child: Expression) extends GetDateField { override val func = DateTimeUtils.getMonth override val funcName = "getMonth" override protected def withNewChildInternal(newChild: Expression): Month = copy(child = newChild) } +// scalastyle:off line.contains.tab @ExpressionDescription( usage = "_FUNC_(date) - Returns the day of month of the date/timestamp.", examples = """ Examples: > SELECT _FUNC_('2009-07-30'); 30 + > SET spark.sql.timestampNanosTypes.enabled=true; + spark.sql.timestampNanosTypes.enabled true + > SELECT _FUNC_(TIMESTAMP_NTZ '2009-07-30 12:00:00.123456789'); + 30 """, group = "datetime_funcs", since = "1.5.0") +// scalastyle:on line.contains.tab case class DayOfMonth(child: Expression) extends GetDateField { override val func = DateTimeUtils.getDayOfMonth override val funcName = "getDayOfMonth" @@ -918,17 +948,21 @@ case class DayOfMonth(child: Expression) extends GetDateField { copy(child = newChild) } -// scalastyle:off line.size.limit +// scalastyle:off line.size.limit line.contains.tab @ExpressionDescription( usage = "_FUNC_(date) - Returns the day of the week for date/timestamp (1 = Sunday, 2 = Monday, ..., 7 = Saturday).", examples = """ Examples: > SELECT _FUNC_('2009-07-30'); 5 + > SET spark.sql.timestampNanosTypes.enabled=true; + spark.sql.timestampNanosTypes.enabled true + > SELECT _FUNC_(TIMESTAMP_NTZ '2009-07-30 12:00:00.123456789'); + 5 """, group = "datetime_funcs", since = "2.3.0") -// scalastyle:on line.size.limit +// scalastyle:on line.size.limit line.contains.tab case class DayOfWeek(child: Expression) extends GetDateField { override val func = DateTimeUtils.getDayOfWeek override val funcName = "getDayOfWeek" @@ -936,17 +970,21 @@ case class DayOfWeek(child: Expression) extends GetDateField { copy(child = newChild) } -// scalastyle:off line.size.limit +// scalastyle:off line.size.limit line.contains.tab @ExpressionDescription( usage = "_FUNC_(date) - Returns the day of the week for date/timestamp (0 = Monday, 1 = Tuesday, ..., 6 = Sunday).", examples = """ Examples: > SELECT _FUNC_('2009-07-30'); 3 + > SET spark.sql.timestampNanosTypes.enabled=true; + spark.sql.timestampNanosTypes.enabled true + > SELECT _FUNC_(TIMESTAMP_NTZ '2009-07-30 12:00:00.123456789'); + 3 """, group = "datetime_funcs", since = "2.4.0") -// scalastyle:on line.size.limit +// scalastyle:on line.size.limit line.contains.tab case class WeekDay(child: Expression) extends GetDateField { override val func = DateTimeUtils.getWeekDay override val funcName = "getWeekDay" @@ -954,17 +992,21 @@ case class WeekDay(child: Expression) extends GetDateField { copy(child = newChild) } -// scalastyle:off line.size.limit +// scalastyle:off line.size.limit line.contains.tab @ExpressionDescription( usage = "_FUNC_(date) - Returns the week of the year of the given date. A week is considered to start on a Monday and week 1 is the first week with >3 days.", examples = """ Examples: > SELECT _FUNC_('2008-02-20'); 8 + > SET spark.sql.timestampNanosTypes.enabled=true; + spark.sql.timestampNanosTypes.enabled true + > SELECT _FUNC_(TIMESTAMP_NTZ '2008-02-20 12:00:00.123456789'); + 8 """, group = "datetime_funcs", since = "1.5.0") -// scalastyle:on line.size.limit +// scalastyle:on line.size.limit line.contains.tab case class WeekOfYear(child: Expression) extends GetDateField { override val func = DateTimeUtils.getWeekOfYear override val funcName = "getWeekOfYear" @@ -972,15 +1014,21 @@ case class WeekOfYear(child: Expression) extends GetDateField { copy(child = newChild) } +// scalastyle:off line.contains.tab @ExpressionDescription( usage = "_FUNC_(date) - Returns the three-letter abbreviated month name from the given date.", examples = """ Examples: > SELECT _FUNC_('2008-02-20'); Feb + > SET spark.sql.timestampNanosTypes.enabled=true; + spark.sql.timestampNanosTypes.enabled true + > SELECT _FUNC_(TIMESTAMP_NTZ '2008-02-20 12:00:00.123456789'); + Feb """, group = "datetime_funcs", since = "4.0.0") +// scalastyle:on line.contains.tab case class MonthName(child: Expression) extends GetDateField with DefaultStringProducingExpression { override val func = DateTimeUtils.getMonthName override val funcName = "getMonthName" @@ -988,15 +1036,21 @@ case class MonthName(child: Expression) extends GetDateField with DefaultStringP copy(child = newChild) } +// scalastyle:off line.contains.tab @ExpressionDescription( usage = "_FUNC_(date) - Returns the three-letter abbreviated day name from the given date.", examples = """ Examples: > SELECT _FUNC_(DATE('2008-02-20')); Wed + > SET spark.sql.timestampNanosTypes.enabled=true; + spark.sql.timestampNanosTypes.enabled true + > SELECT _FUNC_(TIMESTAMP_NTZ '2008-02-20 12:00:00.123456789'); + Wed """, group = "datetime_funcs", since = "4.0.0") +// scalastyle:on line.contains.tab case class DayName(child: Expression) extends GetDateField with DefaultStringProducingExpression { override val func = DateTimeUtils.getDayName override val funcName = "getDayName" diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DataTypeExpression.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DataTypeExpression.scala index fd942ba60de4b..a7f50c0663fa1 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DataTypeExpression.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DataTypeExpression.scala @@ -82,6 +82,11 @@ private[sql] object AnyTimestampTypeExpression { e.dataType.isInstanceOf[TimestampType] || e.dataType.isInstanceOf[TimestampNTZType] } +private[sql] object AnyTimestampNanoTypeExpression { + def unapply(e: Expression): Boolean = + e.dataType.isInstanceOf[TimestampLTZNanosType] || e.dataType.isInstanceOf[TimestampNTZNanosType] +} + private[sql] object DecimalExpression { def unapply(e: Expression): Option[(Int, Int)] = e.dataType match { case t: DecimalType => Some((t.precision, t.scale)) diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/timestamp-ltz-nanos.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/timestamp-ltz-nanos.sql.out index 8b5351dd80678..f9bbaf5034faa 100644 --- a/sql/core/src/test/resources/sql-tests/analyzer-results/timestamp-ltz-nanos.sql.out +++ b/sql/core/src/test/resources/sql-tests/analyzer-results/timestamp-ltz-nanos.sql.out @@ -307,6 +307,193 @@ Project [extract(SECOND, 2019-12-31 23:54:35.123456789) AS extract(SECOND FROM T +- OneRowRelation +-- !query +SELECT year(v), quarter(v), month(v), day(v), dayofyear(v), dayofweek(v), weekday(v), + weekofyear(v), extract(YEAROFWEEK FROM v) + FROM VALUES (TIMESTAMP_LTZ '2020-02-29 23:59:59.999999999') AS t(v) +-- !query analysis +Project [year(cast(v#x as date)) AS year(v)#x, quarter(cast(v#x as date)) AS quarter(v)#x, month(cast(v#x as date)) AS month(v)#x, day(cast(v#x as date)) AS day(v)#x, dayofyear(cast(v#x as date)) AS dayofyear(v)#x, dayofweek(cast(v#x as date)) AS dayofweek(v)#x, weekday(cast(v#x as date)) AS weekday(v)#x, weekofyear(cast(v#x as date)) AS weekofyear(v)#x, extract(YEAROFWEEK, v#x) AS extract(YEAROFWEEK FROM v)#x] ++- SubqueryAlias t + +- LocalRelation [v#x] + + +-- !query +SELECT year(v), quarter(v), month(v), day(v), dayofyear(v), dayofweek(v), weekday(v), + weekofyear(v), extract(YEAROFWEEK FROM v) + FROM VALUES (TIMESTAMP_LTZ '1900-02-28 12:00:00.000000001') AS t(v) +-- !query analysis +Project [year(cast(v#x as date)) AS year(v)#x, quarter(cast(v#x as date)) AS quarter(v)#x, month(cast(v#x as date)) AS month(v)#x, day(cast(v#x as date)) AS day(v)#x, dayofyear(cast(v#x as date)) AS dayofyear(v)#x, dayofweek(cast(v#x as date)) AS dayofweek(v)#x, weekday(cast(v#x as date)) AS weekday(v)#x, weekofyear(cast(v#x as date)) AS weekofyear(v)#x, extract(YEAROFWEEK, v#x) AS extract(YEAROFWEEK FROM v)#x] ++- SubqueryAlias t + +- LocalRelation [v#x] + + +-- !query +SELECT year(v), quarter(v), month(v), day(v), dayofyear(v), dayofweek(v), weekday(v), + weekofyear(v), extract(YEAROFWEEK FROM v) + FROM VALUES (TIMESTAMP_LTZ '2021-01-01 00:00:00.000000001') AS t(v) +-- !query analysis +Project [year(cast(v#x as date)) AS year(v)#x, quarter(cast(v#x as date)) AS quarter(v)#x, month(cast(v#x as date)) AS month(v)#x, day(cast(v#x as date)) AS day(v)#x, dayofyear(cast(v#x as date)) AS dayofyear(v)#x, dayofweek(cast(v#x as date)) AS dayofweek(v)#x, weekday(cast(v#x as date)) AS weekday(v)#x, weekofyear(cast(v#x as date)) AS weekofyear(v)#x, extract(YEAROFWEEK, v#x) AS extract(YEAROFWEEK FROM v)#x] ++- SubqueryAlias t + +- LocalRelation [v#x] + + +-- !query +SELECT year(v), quarter(v), month(v), day(v), dayofyear(v), dayofweek(v), weekday(v), + weekofyear(v), extract(YEAROFWEEK FROM v) + FROM VALUES (TIMESTAMP_LTZ '2016-01-01 06:30:00.123456789') AS t(v) +-- !query analysis +Project [year(cast(v#x as date)) AS year(v)#x, quarter(cast(v#x as date)) AS quarter(v)#x, month(cast(v#x as date)) AS month(v)#x, day(cast(v#x as date)) AS day(v)#x, dayofyear(cast(v#x as date)) AS dayofyear(v)#x, dayofweek(cast(v#x as date)) AS dayofweek(v)#x, weekday(cast(v#x as date)) AS weekday(v)#x, weekofyear(cast(v#x as date)) AS weekofyear(v)#x, extract(YEAROFWEEK, v#x) AS extract(YEAROFWEEK FROM v)#x] ++- SubqueryAlias t + +- LocalRelation [v#x] + + +-- !query +SELECT year(v), quarter(v), month(v), day(v), dayofyear(v), dayofweek(v), weekday(v), + weekofyear(v), extract(YEAROFWEEK FROM v) + FROM VALUES (TIMESTAMP_LTZ '2020-12-31 23:59:59.999999999') AS t(v) +-- !query analysis +Project [year(cast(v#x as date)) AS year(v)#x, quarter(cast(v#x as date)) AS quarter(v)#x, month(cast(v#x as date)) AS month(v)#x, day(cast(v#x as date)) AS day(v)#x, dayofyear(cast(v#x as date)) AS dayofyear(v)#x, dayofweek(cast(v#x as date)) AS dayofweek(v)#x, weekday(cast(v#x as date)) AS weekday(v)#x, weekofyear(cast(v#x as date)) AS weekofyear(v)#x, extract(YEAROFWEEK, v#x) AS extract(YEAROFWEEK FROM v)#x] ++- SubqueryAlias t + +- LocalRelation [v#x] + + +-- !query +SELECT year(v), quarter(v), month(v), day(v), dayofyear(v), dayofweek(v), weekday(v), + weekofyear(v), extract(YEAROFWEEK FROM v) + FROM VALUES (TIMESTAMP_LTZ '1960-07-15 06:07:08.123456789') AS t(v) +-- !query analysis +Project [year(cast(v#x as date)) AS year(v)#x, quarter(cast(v#x as date)) AS quarter(v)#x, month(cast(v#x as date)) AS month(v)#x, day(cast(v#x as date)) AS day(v)#x, dayofyear(cast(v#x as date)) AS dayofyear(v)#x, dayofweek(cast(v#x as date)) AS dayofweek(v)#x, weekday(cast(v#x as date)) AS weekday(v)#x, weekofyear(cast(v#x as date)) AS weekofyear(v)#x, extract(YEAROFWEEK, v#x) AS extract(YEAROFWEEK FROM v)#x] ++- SubqueryAlias t + +- LocalRelation [v#x] + + +-- !query +SELECT year(v), quarter(v), month(v), day(v), dayofyear(v), dayofweek(v), weekday(v), + weekofyear(v), extract(YEAROFWEEK FROM v) + FROM VALUES (TIMESTAMP_LTZ '0001-01-01 00:00:00.000000001') AS t(v) +-- !query analysis +Project [year(cast(v#x as date)) AS year(v)#x, quarter(cast(v#x as date)) AS quarter(v)#x, month(cast(v#x as date)) AS month(v)#x, day(cast(v#x as date)) AS day(v)#x, dayofyear(cast(v#x as date)) AS dayofyear(v)#x, dayofweek(cast(v#x as date)) AS dayofweek(v)#x, weekday(cast(v#x as date)) AS weekday(v)#x, weekofyear(cast(v#x as date)) AS weekofyear(v)#x, extract(YEAROFWEEK, v#x) AS extract(YEAROFWEEK FROM v)#x] ++- SubqueryAlias t + +- LocalRelation [v#x] + + +-- !query +SELECT year(v), month(v), day(v), dayofyear(v) FROM VALUES + ('2020-02-29 13:24:35.000000001' :: timestamp_ltz(7)) AS t(v) +-- !query analysis +Project [year(cast(v#x as date)) AS year(v)#x, month(cast(v#x as date)) AS month(v)#x, day(cast(v#x as date)) AS day(v)#x, dayofyear(cast(v#x as date)) AS dayofyear(v)#x] ++- SubqueryAlias t + +- LocalRelation [v#x] + + +-- !query +SELECT year(v), month(v), day(v), dayofyear(v) FROM VALUES + ('2020-02-29 13:24:35.999999999' :: timestamp_ltz(8)) AS t(v) +-- !query analysis +Project [year(cast(v#x as date)) AS year(v)#x, month(cast(v#x as date)) AS month(v)#x, day(cast(v#x as date)) AS day(v)#x, dayofyear(cast(v#x as date)) AS dayofyear(v)#x] ++- SubqueryAlias t + +- LocalRelation [v#x] + + +-- !query +SELECT year(v), month(v), day(v), dayofyear(v) FROM VALUES + ('2020-02-29 13:24:35.000000000' :: timestamp_ltz(9)) AS t(v) +-- !query analysis +Project [year(cast(v#x as date)) AS year(v)#x, month(cast(v#x as date)) AS month(v)#x, day(cast(v#x as date)) AS day(v)#x, dayofyear(cast(v#x as date)) AS dayofyear(v)#x] ++- SubqueryAlias t + +- LocalRelation [v#x] + + +-- !query +SELECT year(v), month(v), day(v) FROM VALUES + (TIMESTAMP_LTZ '2020-01-01 04:00:00.123456789 UTC') AS t(v) +-- !query analysis +Project [year(cast(v#x as date)) AS year(v)#x, month(cast(v#x as date)) AS month(v)#x, day(cast(v#x as date)) AS day(v)#x] ++- SubqueryAlias t + +- LocalRelation [v#x] + + +-- !query +SELECT year(v), month(v), day(v) FROM VALUES + (TIMESTAMP_LTZ '2020-01-01 04:00:00.123456789') AS t(v) +-- !query analysis +Project [year(cast(v#x as date)) AS year(v)#x, month(cast(v#x as date)) AS month(v)#x, day(cast(v#x as date)) AS day(v)#x] ++- SubqueryAlias t + +- LocalRelation [v#x] + + +-- !query +SELECT year(v), month(v), day(v), dayofyear(v) FROM VALUES + (TIMESTAMP_LTZ '2020-03-01 06:00:00.123456789 Asia/Kolkata') AS t(v) +-- !query analysis +Project [year(cast(v#x as date)) AS year(v)#x, month(cast(v#x as date)) AS month(v)#x, day(cast(v#x as date)) AS day(v)#x, dayofyear(cast(v#x as date)) AS dayofyear(v)#x] ++- SubqueryAlias t + +- LocalRelation [v#x] + + +-- !query +SELECT extract(YEAR FROM TIMESTAMP_LTZ '2020-02-29 12:00:00.123456789') +-- !query analysis +Project [extract(YEAR, 2020-02-29 12:00:00.123456789) AS extract(YEAR FROM TIMESTAMP_LTZ '2020-02-29 12:00:00.123456789')#x] ++- OneRowRelation + + +-- !query +SELECT extract(MONTH FROM TIMESTAMP_LTZ '2020-02-29 12:00:00.123456789') +-- !query analysis +Project [extract(MONTH, 2020-02-29 12:00:00.123456789) AS extract(MONTH FROM TIMESTAMP_LTZ '2020-02-29 12:00:00.123456789')#x] ++- OneRowRelation + + +-- !query +SELECT extract(DAY FROM TIMESTAMP_LTZ '2020-02-29 12:00:00.123456789') +-- !query analysis +Project [extract(DAY, 2020-02-29 12:00:00.123456789) AS extract(DAY FROM TIMESTAMP_LTZ '2020-02-29 12:00:00.123456789')#x] ++- OneRowRelation + + +-- !query +SELECT extract(DOY FROM TIMESTAMP_LTZ '2020-02-29 12:00:00.123456789') +-- !query analysis +Project [extract(DOY, 2020-02-29 12:00:00.123456789) AS extract(DOY FROM TIMESTAMP_LTZ '2020-02-29 12:00:00.123456789')#x] ++- OneRowRelation + + +-- !query +SELECT extract(WEEK FROM TIMESTAMP_LTZ '2021-01-01 12:00:00.123456789') +-- !query analysis +Project [extract(WEEK, 2021-01-01 12:00:00.123456789) AS extract(WEEK FROM TIMESTAMP_LTZ '2021-01-01 12:00:00.123456789')#x] ++- OneRowRelation + + +-- !query +SELECT date_part('QUARTER', TIMESTAMP_LTZ '2020-04-01 00:00:00.000000001') +-- !query analysis +Project [date_part(QUARTER, 2020-04-01 00:00:00.000000001) AS date_part(QUARTER, TIMESTAMP_LTZ '2020-04-01 00:00:00.000000001')#x] ++- OneRowRelation + + +-- !query +SELECT date_part('DOW', TIMESTAMP_LTZ '2020-02-29 00:00:00.000000001') +-- !query analysis +Project [date_part(DOW, 2020-02-29 00:00:00.000000001) AS date_part(DOW, TIMESTAMP_LTZ '2020-02-29 00:00:00.000000001')#x] ++- OneRowRelation + + +-- !query +SELECT date_part('YEAROFWEEK', TIMESTAMP_LTZ '2021-01-01 00:00:00.000000001') +-- !query analysis +Project [date_part(YEAROFWEEK, 2021-01-01 00:00:00.000000001) AS date_part(YEAROFWEEK, TIMESTAMP_LTZ '2021-01-01 00:00:00.000000001')#x] ++- OneRowRelation + + +-- !query +SELECT year(NULL :: timestamp_ltz(9)), month(NULL :: timestamp_ltz(9)) +-- !query analysis +Project [year(cast(cast(null as timestamp_ltz(9)) as date)) AS year(CAST(NULL AS TIMESTAMP_LTZ(9)))#x, month(cast(cast(null as timestamp_ltz(9)) as date)) AS month(CAST(NULL AS TIMESTAMP_LTZ(9)))#x] ++- OneRowRelation + + -- !query SELECT DATE '2020-01-01'::timestamp_ltz(9) -- !query analysis diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/timestamp-ntz-nanos.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/timestamp-ntz-nanos.sql.out index 9e31702d30938..ee11cf91b897f 100644 --- a/sql/core/src/test/resources/sql-tests/analyzer-results/timestamp-ntz-nanos.sql.out +++ b/sql/core/src/test/resources/sql-tests/analyzer-results/timestamp-ntz-nanos.sql.out @@ -251,6 +251,186 @@ Project [extract(SECOND, 1960-01-01 13:24:35.123456789) AS extract(SECOND FROM T +- OneRowRelation +-- !query +SELECT year(v), quarter(v), month(v), day(v), dayofyear(v), dayofweek(v), weekday(v), + weekofyear(v), extract(YEAROFWEEK FROM v) + FROM VALUES (TIMESTAMP_NTZ '2020-02-29 23:59:59.999999999') AS t(v) +-- !query analysis +Project [year(cast(v#x as date)) AS year(v)#x, quarter(cast(v#x as date)) AS quarter(v)#x, month(cast(v#x as date)) AS month(v)#x, day(cast(v#x as date)) AS day(v)#x, dayofyear(cast(v#x as date)) AS dayofyear(v)#x, dayofweek(cast(v#x as date)) AS dayofweek(v)#x, weekday(cast(v#x as date)) AS weekday(v)#x, weekofyear(cast(v#x as date)) AS weekofyear(v)#x, extract(YEAROFWEEK, v#x) AS extract(YEAROFWEEK FROM v)#x] ++- SubqueryAlias t + +- LocalRelation [v#x] + + +-- !query +SELECT year(v), quarter(v), month(v), day(v), dayofyear(v), dayofweek(v), weekday(v), + weekofyear(v), extract(YEAROFWEEK FROM v) + FROM VALUES (TIMESTAMP_NTZ '1900-02-28 12:00:00.000000001') AS t(v) +-- !query analysis +Project [year(cast(v#x as date)) AS year(v)#x, quarter(cast(v#x as date)) AS quarter(v)#x, month(cast(v#x as date)) AS month(v)#x, day(cast(v#x as date)) AS day(v)#x, dayofyear(cast(v#x as date)) AS dayofyear(v)#x, dayofweek(cast(v#x as date)) AS dayofweek(v)#x, weekday(cast(v#x as date)) AS weekday(v)#x, weekofyear(cast(v#x as date)) AS weekofyear(v)#x, extract(YEAROFWEEK, v#x) AS extract(YEAROFWEEK FROM v)#x] ++- SubqueryAlias t + +- LocalRelation [v#x] + + +-- !query +SELECT year(v), quarter(v), month(v), day(v), dayofyear(v), dayofweek(v), weekday(v), + weekofyear(v), extract(YEAROFWEEK FROM v) + FROM VALUES (TIMESTAMP_NTZ '2021-01-01 00:00:00.000000001') AS t(v) +-- !query analysis +Project [year(cast(v#x as date)) AS year(v)#x, quarter(cast(v#x as date)) AS quarter(v)#x, month(cast(v#x as date)) AS month(v)#x, day(cast(v#x as date)) AS day(v)#x, dayofyear(cast(v#x as date)) AS dayofyear(v)#x, dayofweek(cast(v#x as date)) AS dayofweek(v)#x, weekday(cast(v#x as date)) AS weekday(v)#x, weekofyear(cast(v#x as date)) AS weekofyear(v)#x, extract(YEAROFWEEK, v#x) AS extract(YEAROFWEEK FROM v)#x] ++- SubqueryAlias t + +- LocalRelation [v#x] + + +-- !query +SELECT year(v), quarter(v), month(v), day(v), dayofyear(v), dayofweek(v), weekday(v), + weekofyear(v), extract(YEAROFWEEK FROM v) + FROM VALUES (TIMESTAMP_NTZ '2016-01-01 06:30:00.123456789') AS t(v) +-- !query analysis +Project [year(cast(v#x as date)) AS year(v)#x, quarter(cast(v#x as date)) AS quarter(v)#x, month(cast(v#x as date)) AS month(v)#x, day(cast(v#x as date)) AS day(v)#x, dayofyear(cast(v#x as date)) AS dayofyear(v)#x, dayofweek(cast(v#x as date)) AS dayofweek(v)#x, weekday(cast(v#x as date)) AS weekday(v)#x, weekofyear(cast(v#x as date)) AS weekofyear(v)#x, extract(YEAROFWEEK, v#x) AS extract(YEAROFWEEK FROM v)#x] ++- SubqueryAlias t + +- LocalRelation [v#x] + + +-- !query +SELECT year(v), quarter(v), month(v), day(v), dayofyear(v), dayofweek(v), weekday(v), + weekofyear(v), extract(YEAROFWEEK FROM v) + FROM VALUES (TIMESTAMP_NTZ '2020-03-31 13:24:35.123456789') AS t(v) +-- !query analysis +Project [year(cast(v#x as date)) AS year(v)#x, quarter(cast(v#x as date)) AS quarter(v)#x, month(cast(v#x as date)) AS month(v)#x, day(cast(v#x as date)) AS day(v)#x, dayofyear(cast(v#x as date)) AS dayofyear(v)#x, dayofweek(cast(v#x as date)) AS dayofweek(v)#x, weekday(cast(v#x as date)) AS weekday(v)#x, weekofyear(cast(v#x as date)) AS weekofyear(v)#x, extract(YEAROFWEEK, v#x) AS extract(YEAROFWEEK FROM v)#x] ++- SubqueryAlias t + +- LocalRelation [v#x] + + +-- !query +SELECT year(v), quarter(v), month(v), day(v), dayofyear(v), dayofweek(v), weekday(v), + weekofyear(v), extract(YEAROFWEEK FROM v) + FROM VALUES (TIMESTAMP_NTZ '2020-04-01 00:00:00.000000001') AS t(v) +-- !query analysis +Project [year(cast(v#x as date)) AS year(v)#x, quarter(cast(v#x as date)) AS quarter(v)#x, month(cast(v#x as date)) AS month(v)#x, day(cast(v#x as date)) AS day(v)#x, dayofyear(cast(v#x as date)) AS dayofyear(v)#x, dayofweek(cast(v#x as date)) AS dayofweek(v)#x, weekday(cast(v#x as date)) AS weekday(v)#x, weekofyear(cast(v#x as date)) AS weekofyear(v)#x, extract(YEAROFWEEK, v#x) AS extract(YEAROFWEEK FROM v)#x] ++- SubqueryAlias t + +- LocalRelation [v#x] + + +-- !query +SELECT year(v), quarter(v), month(v), day(v), dayofyear(v), dayofweek(v), weekday(v), + weekofyear(v), extract(YEAROFWEEK FROM v) + FROM VALUES (TIMESTAMP_NTZ '2020-12-31 23:59:59.999999999') AS t(v) +-- !query analysis +Project [year(cast(v#x as date)) AS year(v)#x, quarter(cast(v#x as date)) AS quarter(v)#x, month(cast(v#x as date)) AS month(v)#x, day(cast(v#x as date)) AS day(v)#x, dayofyear(cast(v#x as date)) AS dayofyear(v)#x, dayofweek(cast(v#x as date)) AS dayofweek(v)#x, weekday(cast(v#x as date)) AS weekday(v)#x, weekofyear(cast(v#x as date)) AS weekofyear(v)#x, extract(YEAROFWEEK, v#x) AS extract(YEAROFWEEK FROM v)#x] ++- SubqueryAlias t + +- LocalRelation [v#x] + + +-- !query +SELECT year(v), quarter(v), month(v), day(v), dayofyear(v), dayofweek(v), weekday(v), + weekofyear(v), extract(YEAROFWEEK FROM v) + FROM VALUES (TIMESTAMP_NTZ '1960-07-15 06:07:08.123456789') AS t(v) +-- !query analysis +Project [year(cast(v#x as date)) AS year(v)#x, quarter(cast(v#x as date)) AS quarter(v)#x, month(cast(v#x as date)) AS month(v)#x, day(cast(v#x as date)) AS day(v)#x, dayofyear(cast(v#x as date)) AS dayofyear(v)#x, dayofweek(cast(v#x as date)) AS dayofweek(v)#x, weekday(cast(v#x as date)) AS weekday(v)#x, weekofyear(cast(v#x as date)) AS weekofyear(v)#x, extract(YEAROFWEEK, v#x) AS extract(YEAROFWEEK FROM v)#x] ++- SubqueryAlias t + +- LocalRelation [v#x] + + +-- !query +SELECT year(v), quarter(v), month(v), day(v), dayofyear(v), dayofweek(v), weekday(v), + weekofyear(v), extract(YEAROFWEEK FROM v) + FROM VALUES (TIMESTAMP_NTZ '0001-01-01 00:00:00.000000001') AS t(v) +-- !query analysis +Project [year(cast(v#x as date)) AS year(v)#x, quarter(cast(v#x as date)) AS quarter(v)#x, month(cast(v#x as date)) AS month(v)#x, day(cast(v#x as date)) AS day(v)#x, dayofyear(cast(v#x as date)) AS dayofyear(v)#x, dayofweek(cast(v#x as date)) AS dayofweek(v)#x, weekday(cast(v#x as date)) AS weekday(v)#x, weekofyear(cast(v#x as date)) AS weekofyear(v)#x, extract(YEAROFWEEK, v#x) AS extract(YEAROFWEEK FROM v)#x] ++- SubqueryAlias t + +- LocalRelation [v#x] + + +-- !query +SELECT year(v), month(v), day(v), dayofyear(v) FROM VALUES + ('2020-02-29 13:24:35.000000001' :: timestamp_ntz(7)) AS t(v) +-- !query analysis +Project [year(cast(v#x as date)) AS year(v)#x, month(cast(v#x as date)) AS month(v)#x, day(cast(v#x as date)) AS day(v)#x, dayofyear(cast(v#x as date)) AS dayofyear(v)#x] ++- SubqueryAlias t + +- LocalRelation [v#x] + + +-- !query +SELECT year(v), month(v), day(v), dayofyear(v) FROM VALUES + ('2020-02-29 13:24:35.999999999' :: timestamp_ntz(8)) AS t(v) +-- !query analysis +Project [year(cast(v#x as date)) AS year(v)#x, month(cast(v#x as date)) AS month(v)#x, day(cast(v#x as date)) AS day(v)#x, dayofyear(cast(v#x as date)) AS dayofyear(v)#x] ++- SubqueryAlias t + +- LocalRelation [v#x] + + +-- !query +SELECT year(v), month(v), day(v), dayofyear(v) FROM VALUES + ('2020-02-29 13:24:35.000000000' :: timestamp_ntz(9)) AS t(v) +-- !query analysis +Project [year(cast(v#x as date)) AS year(v)#x, month(cast(v#x as date)) AS month(v)#x, day(cast(v#x as date)) AS day(v)#x, dayofyear(cast(v#x as date)) AS dayofyear(v)#x] ++- SubqueryAlias t + +- LocalRelation [v#x] + + +-- !query +SELECT extract(YEAR FROM TIMESTAMP_NTZ '2020-02-29 12:00:00.123456789') +-- !query analysis +Project [extract(YEAR, 2020-02-29 12:00:00.123456789) AS extract(YEAR FROM TIMESTAMP_NTZ '2020-02-29 12:00:00.123456789')#x] ++- OneRowRelation + + +-- !query +SELECT extract(MONTH FROM TIMESTAMP_NTZ '2020-02-29 12:00:00.123456789') +-- !query analysis +Project [extract(MONTH, 2020-02-29 12:00:00.123456789) AS extract(MONTH FROM TIMESTAMP_NTZ '2020-02-29 12:00:00.123456789')#x] ++- OneRowRelation + + +-- !query +SELECT extract(DAY FROM TIMESTAMP_NTZ '2020-02-29 12:00:00.123456789') +-- !query analysis +Project [extract(DAY, 2020-02-29 12:00:00.123456789) AS extract(DAY FROM TIMESTAMP_NTZ '2020-02-29 12:00:00.123456789')#x] ++- OneRowRelation + + +-- !query +SELECT extract(DOY FROM TIMESTAMP_NTZ '2020-02-29 12:00:00.123456789') +-- !query analysis +Project [extract(DOY, 2020-02-29 12:00:00.123456789) AS extract(DOY FROM TIMESTAMP_NTZ '2020-02-29 12:00:00.123456789')#x] ++- OneRowRelation + + +-- !query +SELECT extract(WEEK FROM TIMESTAMP_NTZ '2021-01-01 12:00:00.123456789') +-- !query analysis +Project [extract(WEEK, 2021-01-01 12:00:00.123456789) AS extract(WEEK FROM TIMESTAMP_NTZ '2021-01-01 12:00:00.123456789')#x] ++- OneRowRelation + + +-- !query +SELECT date_part('QUARTER', TIMESTAMP_NTZ '2020-04-01 00:00:00.000000001') +-- !query analysis +Project [date_part(QUARTER, 2020-04-01 00:00:00.000000001) AS date_part(QUARTER, TIMESTAMP_NTZ '2020-04-01 00:00:00.000000001')#x] ++- OneRowRelation + + +-- !query +SELECT date_part('DOW', TIMESTAMP_NTZ '2020-02-29 00:00:00.000000001') +-- !query analysis +Project [date_part(DOW, 2020-02-29 00:00:00.000000001) AS date_part(DOW, TIMESTAMP_NTZ '2020-02-29 00:00:00.000000001')#x] ++- OneRowRelation + + +-- !query +SELECT date_part('YEAROFWEEK', TIMESTAMP_NTZ '2021-01-01 00:00:00.000000001') +-- !query analysis +Project [date_part(YEAROFWEEK, 2021-01-01 00:00:00.000000001) AS date_part(YEAROFWEEK, TIMESTAMP_NTZ '2021-01-01 00:00:00.000000001')#x] ++- OneRowRelation + + +-- !query +SELECT year(NULL :: timestamp_ntz(9)), month(NULL :: timestamp_ntz(9)) +-- !query analysis +Project [year(cast(cast(null as timestamp_ntz(9)) as date)) AS year(CAST(NULL AS TIMESTAMP_NTZ(9)))#x, month(cast(cast(null as timestamp_ntz(9)) as date)) AS month(CAST(NULL AS TIMESTAMP_NTZ(9)))#x] ++- OneRowRelation + + -- !query SELECT DATE '2020-01-01'::timestamp_ntz(9) -- !query analysis diff --git a/sql/core/src/test/resources/sql-tests/inputs/timestamp-ltz-nanos.sql b/sql/core/src/test/resources/sql-tests/inputs/timestamp-ltz-nanos.sql index f2cff02b94d57..354d491c955c4 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/timestamp-ltz-nanos.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/timestamp-ltz-nanos.sql @@ -80,6 +80,66 @@ SELECT extract(SECOND FROM TIMESTAMP_LTZ '1960-01-01 13:24:35.123456789'); SELECT extract(MINUTE FROM TIMESTAMP_LTZ '2020-01-01 13:24:35.123456789 Asia/Kolkata'); SELECT extract(SECOND FROM TIMESTAMP_LTZ '2020-01-01 13:24:35.123456789 Asia/Kolkata'); +-- Date field functions over nanosecond-precision values (SPARK-57469). Date fields depend only +-- on the calendar date, so the precision, time-of-day and sub-microsecond digits never affect the +-- result; LTZ casts to DATE in the session time zone, so a zone shift can move the calendar day. +-- Columns are year, quarter, month, day, dayofyear, dayofweek (1=Sun..7=Sat), +-- weekday (0=Mon..6=Sun), weekofyear (ISO), yearofweek (ISO). +SELECT year(v), quarter(v), month(v), day(v), dayofyear(v), dayofweek(v), weekday(v), + weekofyear(v), extract(YEAROFWEEK FROM v) + FROM VALUES (TIMESTAMP_LTZ '2020-02-29 23:59:59.999999999') AS t(v); +SELECT year(v), quarter(v), month(v), day(v), dayofyear(v), dayofweek(v), weekday(v), + weekofyear(v), extract(YEAROFWEEK FROM v) + FROM VALUES (TIMESTAMP_LTZ '1900-02-28 12:00:00.000000001') AS t(v); +SELECT year(v), quarter(v), month(v), day(v), dayofyear(v), dayofweek(v), weekday(v), + weekofyear(v), extract(YEAROFWEEK FROM v) + FROM VALUES (TIMESTAMP_LTZ '2021-01-01 00:00:00.000000001') AS t(v); +SELECT year(v), quarter(v), month(v), day(v), dayofyear(v), dayofweek(v), weekday(v), + weekofyear(v), extract(YEAROFWEEK FROM v) + FROM VALUES (TIMESTAMP_LTZ '2016-01-01 06:30:00.123456789') AS t(v); +SELECT year(v), quarter(v), month(v), day(v), dayofyear(v), dayofweek(v), weekday(v), + weekofyear(v), extract(YEAROFWEEK FROM v) + FROM VALUES (TIMESTAMP_LTZ '2020-12-31 23:59:59.999999999') AS t(v); +-- Pre-epoch and far-past dates exercise the negative-epoch / minimum-date path. +SELECT year(v), quarter(v), month(v), day(v), dayofyear(v), dayofweek(v), weekday(v), + weekofyear(v), extract(YEAROFWEEK FROM v) + FROM VALUES (TIMESTAMP_LTZ '1960-07-15 06:07:08.123456789') AS t(v); +SELECT year(v), quarter(v), month(v), day(v), dayofyear(v), dayofweek(v), weekday(v), + weekofyear(v), extract(YEAROFWEEK FROM v) + FROM VALUES (TIMESTAMP_LTZ '0001-01-01 00:00:00.000000001') AS t(v); + +-- Precision (7/8/9) and fraction invariance: the same instant read at different precisions and +-- fractions yields identical date fields. +SELECT year(v), month(v), day(v), dayofyear(v) FROM VALUES + ('2020-02-29 13:24:35.000000001' :: timestamp_ltz(7)) AS t(v); +SELECT year(v), month(v), day(v), dayofyear(v) FROM VALUES + ('2020-02-29 13:24:35.999999999' :: timestamp_ltz(8)) AS t(v); +SELECT year(v), month(v), day(v), dayofyear(v) FROM VALUES + ('2020-02-29 13:24:35.000000000' :: timestamp_ltz(9)) AS t(v); + +-- Time-zone-driven date shifts. An early-hours UTC instant rolls back a day in the session zone +-- (America/Los_Angeles, UTC-08:00), here crossing the year boundary to 2019-12-31. +SELECT year(v), month(v), day(v) FROM VALUES + (TIMESTAMP_LTZ '2020-01-01 04:00:00.123456789 UTC') AS t(v); +SELECT year(v), month(v), day(v) FROM VALUES + (TIMESTAMP_LTZ '2020-01-01 04:00:00.123456789') AS t(v); +-- A sub-hour-offset source zone (Asia/Kolkata, UTC+05:30) near the leap-day boundary. +SELECT year(v), month(v), day(v), dayofyear(v) FROM VALUES + (TIMESTAMP_LTZ '2020-03-01 06:00:00.123456789 Asia/Kolkata') AS t(v); + +-- EXTRACT / date_part date components (rewrite transitively to the same functions). +SELECT extract(YEAR FROM TIMESTAMP_LTZ '2020-02-29 12:00:00.123456789'); +SELECT extract(MONTH FROM TIMESTAMP_LTZ '2020-02-29 12:00:00.123456789'); +SELECT extract(DAY FROM TIMESTAMP_LTZ '2020-02-29 12:00:00.123456789'); +SELECT extract(DOY FROM TIMESTAMP_LTZ '2020-02-29 12:00:00.123456789'); +SELECT extract(WEEK FROM TIMESTAMP_LTZ '2021-01-01 12:00:00.123456789'); +SELECT date_part('QUARTER', TIMESTAMP_LTZ '2020-04-01 00:00:00.000000001'); +SELECT date_part('DOW', TIMESTAMP_LTZ '2020-02-29 00:00:00.000000001'); +SELECT date_part('YEAROFWEEK', TIMESTAMP_LTZ '2021-01-01 00:00:00.000000001'); + +-- NULL nanosecond timestamp. +SELECT year(NULL :: timestamp_ltz(9)), month(NULL :: timestamp_ltz(9)); + -- DATE <-> TIMESTAMP_LTZ(p) casts (SPARK-57323): midnight in the session zone / date extraction. -- Nanosecond typed literals derive precision from the fractional digits (SPARK-57250). SELECT DATE '2020-01-01'::timestamp_ltz(9); diff --git a/sql/core/src/test/resources/sql-tests/inputs/timestamp-ntz-nanos.sql b/sql/core/src/test/resources/sql-tests/inputs/timestamp-ntz-nanos.sql index a6d55ca3fc800..847a6d3607e12 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/timestamp-ntz-nanos.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/timestamp-ntz-nanos.sql @@ -66,6 +66,62 @@ SELECT extract(SECOND FROM NULL :: timestamp_ntz(9)); -- Pre-epoch nanosecond values exercise the negative-epoch path. SELECT extract(SECOND FROM TIMESTAMP_NTZ '1960-01-01 13:24:35.123456789'); +-- Date field functions over nanosecond-precision values (SPARK-57469). Date fields depend only +-- on the calendar date, so the precision, time-of-day and sub-microsecond digits never affect the +-- result; the values below exercise leap years, ISO-week and quarter boundaries, pre-epoch and +-- far-past dates, and varied precisions / fractions. Columns are year, quarter, month, day, +-- dayofyear, dayofweek (1=Sun..7=Sat), weekday (0=Mon..6=Sun), weekofyear (ISO), yearofweek (ISO). +SELECT year(v), quarter(v), month(v), day(v), dayofyear(v), dayofweek(v), weekday(v), + weekofyear(v), extract(YEAROFWEEK FROM v) + FROM VALUES (TIMESTAMP_NTZ '2020-02-29 23:59:59.999999999') AS t(v); +SELECT year(v), quarter(v), month(v), day(v), dayofyear(v), dayofweek(v), weekday(v), + weekofyear(v), extract(YEAROFWEEK FROM v) + FROM VALUES (TIMESTAMP_NTZ '1900-02-28 12:00:00.000000001') AS t(v); +SELECT year(v), quarter(v), month(v), day(v), dayofyear(v), dayofweek(v), weekday(v), + weekofyear(v), extract(YEAROFWEEK FROM v) + FROM VALUES (TIMESTAMP_NTZ '2021-01-01 00:00:00.000000001') AS t(v); +SELECT year(v), quarter(v), month(v), day(v), dayofyear(v), dayofweek(v), weekday(v), + weekofyear(v), extract(YEAROFWEEK FROM v) + FROM VALUES (TIMESTAMP_NTZ '2016-01-01 06:30:00.123456789') AS t(v); +SELECT year(v), quarter(v), month(v), day(v), dayofyear(v), dayofweek(v), weekday(v), + weekofyear(v), extract(YEAROFWEEK FROM v) + FROM VALUES (TIMESTAMP_NTZ '2020-03-31 13:24:35.123456789') AS t(v); +SELECT year(v), quarter(v), month(v), day(v), dayofyear(v), dayofweek(v), weekday(v), + weekofyear(v), extract(YEAROFWEEK FROM v) + FROM VALUES (TIMESTAMP_NTZ '2020-04-01 00:00:00.000000001') AS t(v); +SELECT year(v), quarter(v), month(v), day(v), dayofyear(v), dayofweek(v), weekday(v), + weekofyear(v), extract(YEAROFWEEK FROM v) + FROM VALUES (TIMESTAMP_NTZ '2020-12-31 23:59:59.999999999') AS t(v); +-- Pre-epoch and far-past dates exercise the negative-epoch / minimum-date path. +SELECT year(v), quarter(v), month(v), day(v), dayofyear(v), dayofweek(v), weekday(v), + weekofyear(v), extract(YEAROFWEEK FROM v) + FROM VALUES (TIMESTAMP_NTZ '1960-07-15 06:07:08.123456789') AS t(v); +SELECT year(v), quarter(v), month(v), day(v), dayofyear(v), dayofweek(v), weekday(v), + weekofyear(v), extract(YEAROFWEEK FROM v) + FROM VALUES (TIMESTAMP_NTZ '0001-01-01 00:00:00.000000001') AS t(v); + +-- Precision (7/8/9) and fraction invariance: the same date read at different precisions and +-- fractions yields identical date fields. +SELECT year(v), month(v), day(v), dayofyear(v) FROM VALUES + ('2020-02-29 13:24:35.000000001' :: timestamp_ntz(7)) AS t(v); +SELECT year(v), month(v), day(v), dayofyear(v) FROM VALUES + ('2020-02-29 13:24:35.999999999' :: timestamp_ntz(8)) AS t(v); +SELECT year(v), month(v), day(v), dayofyear(v) FROM VALUES + ('2020-02-29 13:24:35.000000000' :: timestamp_ntz(9)) AS t(v); + +-- EXTRACT / date_part date components (rewrite transitively to the same functions). +SELECT extract(YEAR FROM TIMESTAMP_NTZ '2020-02-29 12:00:00.123456789'); +SELECT extract(MONTH FROM TIMESTAMP_NTZ '2020-02-29 12:00:00.123456789'); +SELECT extract(DAY FROM TIMESTAMP_NTZ '2020-02-29 12:00:00.123456789'); +SELECT extract(DOY FROM TIMESTAMP_NTZ '2020-02-29 12:00:00.123456789'); +SELECT extract(WEEK FROM TIMESTAMP_NTZ '2021-01-01 12:00:00.123456789'); +SELECT date_part('QUARTER', TIMESTAMP_NTZ '2020-04-01 00:00:00.000000001'); +SELECT date_part('DOW', TIMESTAMP_NTZ '2020-02-29 00:00:00.000000001'); +SELECT date_part('YEAROFWEEK', TIMESTAMP_NTZ '2021-01-01 00:00:00.000000001'); + +-- NULL nanosecond timestamp. +SELECT year(NULL :: timestamp_ntz(9)), month(NULL :: timestamp_ntz(9)); + -- DATE <-> TIMESTAMP_NTZ(p) casts (SPARK-57323): midnight UTC / date extraction (zone-independent). -- Nanosecond typed literals derive precision from the fractional digits (SPARK-57250). SELECT DATE '2020-01-01'::timestamp_ntz(9); diff --git a/sql/core/src/test/resources/sql-tests/results/timestamp-ltz-nanos.sql.out b/sql/core/src/test/resources/sql-tests/results/timestamp-ltz-nanos.sql.out index 0a216023966b5..4b1f02b792d1d 100644 --- a/sql/core/src/test/resources/sql-tests/results/timestamp-ltz-nanos.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/timestamp-ltz-nanos.sql.out @@ -351,6 +351,202 @@ struct +-- !query output +2020 1 2 29 60 7 5 9 2020 + + +-- !query +SELECT year(v), quarter(v), month(v), day(v), dayofyear(v), dayofweek(v), weekday(v), + weekofyear(v), extract(YEAROFWEEK FROM v) + FROM VALUES (TIMESTAMP_LTZ '1900-02-28 12:00:00.000000001') AS t(v) +-- !query schema +struct +-- !query output +1900 1 2 28 59 4 2 9 1900 + + +-- !query +SELECT year(v), quarter(v), month(v), day(v), dayofyear(v), dayofweek(v), weekday(v), + weekofyear(v), extract(YEAROFWEEK FROM v) + FROM VALUES (TIMESTAMP_LTZ '2021-01-01 00:00:00.000000001') AS t(v) +-- !query schema +struct +-- !query output +2021 1 1 1 1 6 4 53 2020 + + +-- !query +SELECT year(v), quarter(v), month(v), day(v), dayofyear(v), dayofweek(v), weekday(v), + weekofyear(v), extract(YEAROFWEEK FROM v) + FROM VALUES (TIMESTAMP_LTZ '2016-01-01 06:30:00.123456789') AS t(v) +-- !query schema +struct +-- !query output +2016 1 1 1 1 6 4 53 2015 + + +-- !query +SELECT year(v), quarter(v), month(v), day(v), dayofyear(v), dayofweek(v), weekday(v), + weekofyear(v), extract(YEAROFWEEK FROM v) + FROM VALUES (TIMESTAMP_LTZ '2020-12-31 23:59:59.999999999') AS t(v) +-- !query schema +struct +-- !query output +2020 4 12 31 366 5 3 53 2020 + + +-- !query +SELECT year(v), quarter(v), month(v), day(v), dayofyear(v), dayofweek(v), weekday(v), + weekofyear(v), extract(YEAROFWEEK FROM v) + FROM VALUES (TIMESTAMP_LTZ '1960-07-15 06:07:08.123456789') AS t(v) +-- !query schema +struct +-- !query output +1960 3 7 15 197 6 4 28 1960 + + +-- !query +SELECT year(v), quarter(v), month(v), day(v), dayofyear(v), dayofweek(v), weekday(v), + weekofyear(v), extract(YEAROFWEEK FROM v) + FROM VALUES (TIMESTAMP_LTZ '0001-01-01 00:00:00.000000001') AS t(v) +-- !query schema +struct +-- !query output +1 1 1 1 1 2 0 1 1 + + +-- !query +SELECT year(v), month(v), day(v), dayofyear(v) FROM VALUES + ('2020-02-29 13:24:35.000000001' :: timestamp_ltz(7)) AS t(v) +-- !query schema +struct +-- !query output +2020 2 29 60 + + +-- !query +SELECT year(v), month(v), day(v), dayofyear(v) FROM VALUES + ('2020-02-29 13:24:35.999999999' :: timestamp_ltz(8)) AS t(v) +-- !query schema +struct +-- !query output +2020 2 29 60 + + +-- !query +SELECT year(v), month(v), day(v), dayofyear(v) FROM VALUES + ('2020-02-29 13:24:35.000000000' :: timestamp_ltz(9)) AS t(v) +-- !query schema +struct +-- !query output +2020 2 29 60 + + +-- !query +SELECT year(v), month(v), day(v) FROM VALUES + (TIMESTAMP_LTZ '2020-01-01 04:00:00.123456789 UTC') AS t(v) +-- !query schema +struct +-- !query output +2019 12 31 + + +-- !query +SELECT year(v), month(v), day(v) FROM VALUES + (TIMESTAMP_LTZ '2020-01-01 04:00:00.123456789') AS t(v) +-- !query schema +struct +-- !query output +2020 1 1 + + +-- !query +SELECT year(v), month(v), day(v), dayofyear(v) FROM VALUES + (TIMESTAMP_LTZ '2020-03-01 06:00:00.123456789 Asia/Kolkata') AS t(v) +-- !query schema +struct +-- !query output +2020 2 29 60 + + +-- !query +SELECT extract(YEAR FROM TIMESTAMP_LTZ '2020-02-29 12:00:00.123456789') +-- !query schema +struct +-- !query output +2020 + + +-- !query +SELECT extract(MONTH FROM TIMESTAMP_LTZ '2020-02-29 12:00:00.123456789') +-- !query schema +struct +-- !query output +2 + + +-- !query +SELECT extract(DAY FROM TIMESTAMP_LTZ '2020-02-29 12:00:00.123456789') +-- !query schema +struct +-- !query output +29 + + +-- !query +SELECT extract(DOY FROM TIMESTAMP_LTZ '2020-02-29 12:00:00.123456789') +-- !query schema +struct +-- !query output +60 + + +-- !query +SELECT extract(WEEK FROM TIMESTAMP_LTZ '2021-01-01 12:00:00.123456789') +-- !query schema +struct +-- !query output +53 + + +-- !query +SELECT date_part('QUARTER', TIMESTAMP_LTZ '2020-04-01 00:00:00.000000001') +-- !query schema +struct +-- !query output +2 + + +-- !query +SELECT date_part('DOW', TIMESTAMP_LTZ '2020-02-29 00:00:00.000000001') +-- !query schema +struct +-- !query output +7 + + +-- !query +SELECT date_part('YEAROFWEEK', TIMESTAMP_LTZ '2021-01-01 00:00:00.000000001') +-- !query schema +struct +-- !query output +2020 + + +-- !query +SELECT year(NULL :: timestamp_ltz(9)), month(NULL :: timestamp_ltz(9)) +-- !query schema +struct +-- !query output +NULL NULL + + -- !query SELECT DATE '2020-01-01'::timestamp_ltz(9) -- !query schema diff --git a/sql/core/src/test/resources/sql-tests/results/timestamp-ntz-nanos.sql.out b/sql/core/src/test/resources/sql-tests/results/timestamp-ntz-nanos.sql.out index 551786284e815..ee134ef9ce696 100644 --- a/sql/core/src/test/resources/sql-tests/results/timestamp-ntz-nanos.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/timestamp-ntz-nanos.sql.out @@ -287,6 +287,195 @@ struct +-- !query output +2020 1 2 29 60 7 5 9 2020 + + +-- !query +SELECT year(v), quarter(v), month(v), day(v), dayofyear(v), dayofweek(v), weekday(v), + weekofyear(v), extract(YEAROFWEEK FROM v) + FROM VALUES (TIMESTAMP_NTZ '1900-02-28 12:00:00.000000001') AS t(v) +-- !query schema +struct +-- !query output +1900 1 2 28 59 4 2 9 1900 + + +-- !query +SELECT year(v), quarter(v), month(v), day(v), dayofyear(v), dayofweek(v), weekday(v), + weekofyear(v), extract(YEAROFWEEK FROM v) + FROM VALUES (TIMESTAMP_NTZ '2021-01-01 00:00:00.000000001') AS t(v) +-- !query schema +struct +-- !query output +2021 1 1 1 1 6 4 53 2020 + + +-- !query +SELECT year(v), quarter(v), month(v), day(v), dayofyear(v), dayofweek(v), weekday(v), + weekofyear(v), extract(YEAROFWEEK FROM v) + FROM VALUES (TIMESTAMP_NTZ '2016-01-01 06:30:00.123456789') AS t(v) +-- !query schema +struct +-- !query output +2016 1 1 1 1 6 4 53 2015 + + +-- !query +SELECT year(v), quarter(v), month(v), day(v), dayofyear(v), dayofweek(v), weekday(v), + weekofyear(v), extract(YEAROFWEEK FROM v) + FROM VALUES (TIMESTAMP_NTZ '2020-03-31 13:24:35.123456789') AS t(v) +-- !query schema +struct +-- !query output +2020 1 3 31 91 3 1 14 2020 + + +-- !query +SELECT year(v), quarter(v), month(v), day(v), dayofyear(v), dayofweek(v), weekday(v), + weekofyear(v), extract(YEAROFWEEK FROM v) + FROM VALUES (TIMESTAMP_NTZ '2020-04-01 00:00:00.000000001') AS t(v) +-- !query schema +struct +-- !query output +2020 2 4 1 92 4 2 14 2020 + + +-- !query +SELECT year(v), quarter(v), month(v), day(v), dayofyear(v), dayofweek(v), weekday(v), + weekofyear(v), extract(YEAROFWEEK FROM v) + FROM VALUES (TIMESTAMP_NTZ '2020-12-31 23:59:59.999999999') AS t(v) +-- !query schema +struct +-- !query output +2020 4 12 31 366 5 3 53 2020 + + +-- !query +SELECT year(v), quarter(v), month(v), day(v), dayofyear(v), dayofweek(v), weekday(v), + weekofyear(v), extract(YEAROFWEEK FROM v) + FROM VALUES (TIMESTAMP_NTZ '1960-07-15 06:07:08.123456789') AS t(v) +-- !query schema +struct +-- !query output +1960 3 7 15 197 6 4 28 1960 + + +-- !query +SELECT year(v), quarter(v), month(v), day(v), dayofyear(v), dayofweek(v), weekday(v), + weekofyear(v), extract(YEAROFWEEK FROM v) + FROM VALUES (TIMESTAMP_NTZ '0001-01-01 00:00:00.000000001') AS t(v) +-- !query schema +struct +-- !query output +1 1 1 1 1 2 0 1 1 + + +-- !query +SELECT year(v), month(v), day(v), dayofyear(v) FROM VALUES + ('2020-02-29 13:24:35.000000001' :: timestamp_ntz(7)) AS t(v) +-- !query schema +struct +-- !query output +2020 2 29 60 + + +-- !query +SELECT year(v), month(v), day(v), dayofyear(v) FROM VALUES + ('2020-02-29 13:24:35.999999999' :: timestamp_ntz(8)) AS t(v) +-- !query schema +struct +-- !query output +2020 2 29 60 + + +-- !query +SELECT year(v), month(v), day(v), dayofyear(v) FROM VALUES + ('2020-02-29 13:24:35.000000000' :: timestamp_ntz(9)) AS t(v) +-- !query schema +struct +-- !query output +2020 2 29 60 + + +-- !query +SELECT extract(YEAR FROM TIMESTAMP_NTZ '2020-02-29 12:00:00.123456789') +-- !query schema +struct +-- !query output +2020 + + +-- !query +SELECT extract(MONTH FROM TIMESTAMP_NTZ '2020-02-29 12:00:00.123456789') +-- !query schema +struct +-- !query output +2 + + +-- !query +SELECT extract(DAY FROM TIMESTAMP_NTZ '2020-02-29 12:00:00.123456789') +-- !query schema +struct +-- !query output +29 + + +-- !query +SELECT extract(DOY FROM TIMESTAMP_NTZ '2020-02-29 12:00:00.123456789') +-- !query schema +struct +-- !query output +60 + + +-- !query +SELECT extract(WEEK FROM TIMESTAMP_NTZ '2021-01-01 12:00:00.123456789') +-- !query schema +struct +-- !query output +53 + + +-- !query +SELECT date_part('QUARTER', TIMESTAMP_NTZ '2020-04-01 00:00:00.000000001') +-- !query schema +struct +-- !query output +2 + + +-- !query +SELECT date_part('DOW', TIMESTAMP_NTZ '2020-02-29 00:00:00.000000001') +-- !query schema +struct +-- !query output +7 + + +-- !query +SELECT date_part('YEAROFWEEK', TIMESTAMP_NTZ '2021-01-01 00:00:00.000000001') +-- !query schema +struct +-- !query output +2020 + + +-- !query +SELECT year(NULL :: timestamp_ntz(9)), month(NULL :: timestamp_ntz(9)) +-- !query schema +struct +-- !query output +NULL NULL + + -- !query SELECT DATE '2020-01-01'::timestamp_ntz(9) -- !query schema diff --git a/sql/core/src/test/scala/org/apache/spark/sql/TimestampNanosFunctionsSuiteBase.scala b/sql/core/src/test/scala/org/apache/spark/sql/TimestampNanosFunctionsSuiteBase.scala index 8031ed5cf7c2e..8b47efb85f0d2 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/TimestampNanosFunctionsSuiteBase.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/TimestampNanosFunctionsSuiteBase.scala @@ -146,6 +146,99 @@ abstract class TimestampNanosFunctionsSuiteBase extends SharedSparkSession { df.selectExpr("extract(SECOND FROM ntz)"), Row(new java.math.BigDecimal("7.123456789"))) } + + // Date field extraction functions, plus the EXTRACT / date_part date components that rewrite + // to them. The fields depend only on the calendar date, so the precision, time-of-day and + // sub-microsecond digits never change the result. + private val dateFieldExprs = Seq( + "year(c)", "quarter(c)", "month(c)", "day(c)", "dayofmonth(c)", "dayofyear(c)", + "dayofweek(c)", "weekday(c)", "weekofyear(c)", + "extract(YEAR FROM c)", "extract(MONTH FROM c)", "extract(DAY FROM c)", + "extract(DOY FROM c)", "extract(WEEK FROM c)", "extract(DOW FROM c)", + "extract(YEAROFWEEK FROM c)", "date_part('QUARTER', c)", "date_part('DOY', c)") + + private def ntzNanos(ldt: String, precision: Int): DataFrame = + spark.createDataFrame( + spark.sparkContext.parallelize(Seq(Row(LocalDateTime.parse(ldt)))), + new StructType().add("c", TimestampNTZNanosType(precision))) + + private def ltzNanos(instant: String, precision: Int): DataFrame = + spark.createDataFrame( + spark.sparkContext.parallelize(Seq(Row(Instant.parse(instant)))), + new StructType().add("c", TimestampLTZNanosType(precision))) + + test("SPARK-57469: date field functions over nanosecond TIMESTAMP_NTZ match the micro path") { + // Leap day, an ISO-week boundary (week 53 of 2020), a quarter boundary, and a pre-epoch date. + val ldts = Seq( + "2020-02-29T12:34:56.123456789", + "2021-01-01T00:00:00.000000001", + "2020-04-01T23:59:59.999999999", + "1960-07-15T06:07:08.123456789") + Seq(7, 8, 9).foreach { p => + ldts.foreach { s => + val nanos = ntzNanos(s, p) + val micro = spark.createDataFrame( + spark.sparkContext.parallelize(Seq(Row(LocalDateTime.parse(s)))), + new StructType().add("c", TimestampNTZType)) + checkAnswer(nanos.selectExpr(dateFieldExprs: _*), micro.selectExpr(dateFieldExprs: _*)) + } + } + } + + test("SPARK-57469: date field functions over nanosecond TIMESTAMP_LTZ match the micro path") { + // The third and fourth instants roll back to the previous calendar day in the session zone + // (America/Los_Angeles), crossing a year and a quarter boundary respectively. + val instants = Seq( + "2020-02-29T12:34:56.123456789Z", + "2020-01-01T04:00:00.123456789Z", + "2020-04-01T06:00:00.123456789Z", + "1960-07-15T06:07:08.123456789Z") + Seq(7, 8, 9).foreach { p => + instants.foreach { s => + val nanos = ltzNanos(s, p) + val micro = spark.createDataFrame( + spark.sparkContext.parallelize(Seq(Row(Instant.parse(s)))), + new StructType().add("c", TimestampType)) + checkAnswer(nanos.selectExpr(dateFieldExprs: _*), micro.selectExpr(dateFieldExprs: _*)) + } + } + } + + test("SPARK-57469: date field corner cases over nanosecond TIMESTAMP_NTZ") { + // year, quarter, month, day, dayofyear, dayofweek (1=Sun..7=Sat), weekday (0=Mon..6=Sun), + // weekofyear (ISO), yearofweek (ISO week-based year). + val fields = Seq("year(c)", "quarter(c)", "month(c)", "day(c)", "dayofyear(c)", + "dayofweek(c)", "weekday(c)", "weekofyear(c)", "extract(YEAROFWEEK FROM c)") + Seq(7, 8, 9).foreach { p => + // 2020-02-29 is a Saturday in the leap year 2020: day 60, ISO week 9. + checkAnswer( + ntzNanos("2020-02-29T23:59:59.999999999", p).selectExpr(fields: _*), + Row(2020, 1, 2, 29, 60, 7, 5, 9, 2020)) + // 2021-01-01 is a Friday that belongs to ISO week 53 of 2020. + checkAnswer( + ntzNanos("2021-01-01T00:00:00.000000001", p).selectExpr(fields: _*), + Row(2021, 1, 1, 1, 1, 6, 4, 53, 2020)) + } + } + + test("SPARK-57469: date field functions match the functions.* Column API over nanos") { + val df = ntzNanos("2020-02-29T12:34:56.123456789", 9) + checkAnswer( + df.selectExpr("year(c)", "month(c)", "dayofmonth(c)", "dayofweek(c)", "weekofyear(c)"), + df.select(year(col("c")), month(col("c")), dayofmonth(col("c")), + dayofweek(col("c")), weekofyear(col("c")))) + } + + test("SPARK-57469: date field functions over NULL nanosecond timestamps") { + Seq(7, 8, 9).foreach { p => + val ntz = spark.createDataFrame( + spark.sparkContext.parallelize(Seq(Row(null))), + new StructType().add("c", TimestampNTZNanosType(p))) + checkAnswer( + ntz.selectExpr("year(c)", "month(c)", "day(c)", "extract(DOY FROM c)"), + Row(null, null, null, null)) + } + } } // Runs the nanosecond timestamp function tests with ANSI mode enabled explicitly.