From 16af516da53856be6fb97bf47f9048650815d27e Mon Sep 17 00:00:00 2001 From: Sourabh Badhya Date: Fri, 22 Mar 2024 16:29:43 +0530 Subject: [PATCH] HIVE-28087: Iceberg: Timestamp partition columns with transforms are not correctly sorted during insert (#5134) (Sourabh Badhya reviewed by Ayush Saxena, Simhadri Govindappa) --- data/files/query-hive-28087.csv | 47 ++++ .../mr/hive/udf/GenericUDFIcebergDay.java | 4 +- .../mr/hive/udf/GenericUDFIcebergHour.java | 8 +- .../mr/hive/udf/GenericUDFIcebergMonth.java | 4 +- .../mr/hive/udf/GenericUDFIcebergYear.java | 4 +- .../test/queries/positive/iceberg_clustered.q | 57 +++++ .../positive/llap/iceberg_clustered.q.out | 215 ++++++++++++++++++ .../resources/testconfiguration.properties | 2 + .../serde2/io/TimestampLocalTZWritable.java | 16 ++ .../hive/serde2/io/TimestampWritableV2.java | 16 ++ 10 files changed, 363 insertions(+), 10 deletions(-) create mode 100644 data/files/query-hive-28087.csv create mode 100644 iceberg/iceberg-handler/src/test/queries/positive/iceberg_clustered.q create mode 100644 iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_clustered.q.out diff --git a/data/files/query-hive-28087.csv b/data/files/query-hive-28087.csv new file mode 100644 index 000000000000..24ab64eeb234 --- /dev/null +++ b/data/files/query-hive-28087.csv @@ -0,0 +1,47 @@ +vectortab10k.t,vectortab10k.si,vectortab10k.i,vectortab10k.b,vectortab10k.f,vectortab10k.d,vectortab10k.dc,vectortab10k.bo,vectortab10k.s,vectortab10k.s2,vectortab10k.ts,vectortab10k.ts2,vectortab10k.dt +-111,NULL,NULL,2607,5765.39,3882592.64,3785719054585.463867000000000000,True,mathematics,nick xylophone,2065-08-13 19:03:52,2018-11-14 17:26:37.322428,2086-05-13 +-87,21091,-1012329052,9182828596851990528,22311.9,2883584.5,3563455992720.427734000000000000,True,quiet hour,david quirinius,2047-08-30 02:59:28,2062-07-07 09:32:33.387727,2083-11-21 +-58,-31395,-1730740504,302,11322.18,-2081693.61,-1639547871112.334473000000000000,True,industrial engineering,xavier laertes,2057-09-02 10:26:29,2074-04-07 03:59:19.110762,NULL +-19,133,95356298,78,-15490.77,-642668.92,NULL,True,american history,tom ovid,2059-06-14 15:58:37,2021-07-06 11:56:50.113924,NULL +95,27922,-2027812975,3764,-38980.4,-497550.81,4616197451686.900391000000000000,False,education,wendy davidson,2031-12-06 02:13:55,2037-04-01 15:48:02.70772,2013-05-01 +23,-9171,-805288503,7432428551399669760,-35391.71,-1387292.76,-4384441919303.938477000000000000,NULL,nap time,xavier hernandez,2069-02-22 06:47:15,2068-04-02 03:24:11.192596,2048-06-21 +7,725,NULL,3118,-12519.93,-1305193.84,3984781579512.529297000000000000,True,forestry,david davidson,2071-01-16 20:21:17,2049-05-02 13:53:42.230477,2030-02-20 +52,8918,-772236518,7250237407877382144,32666.2,-3997512.4,-4361210532142.081055000000000000,True,religion,,2068-10-07 03:23:30,NULL,2010-12-13 +54,-30304,-146961490,8100036735858401280,38097.34,300734.43,-101907680156.507812000000000000,False,wind surfing,alice ellison,2076-07-17 05:35:58,2017-10-19 13:37:33.310745,1985-06-25 +36,-10317,-538812082,7792036342592348160,36810.38,1666710.2,1617263763890.299805000000000000,True,debate,xavier king,NULL,2049-11-23 23:25:42.906731,2025-10-11 +-92,21849,868714547,-7496839341561954304,32711.55,-2738225.86,-946920531679.183105000000000000,False,biology,zach falkner,2064-05-11 04:54:01,2041-04-22 19:12:27.123637,2020-04-05 +-127,7353,1008698636,1719,4983.6,-2636583.28,2290285251062.656250000000000000,False,american history,ethan polk,2029-05-28 08:57:08,2071-11-04 02:44:19.546016,2048-06-29 +73,16195,737149747,8283099811330506752,5758.0,2121137.53,-3867208889795.392578000000000000,True,philosophy,sarah davidson,2061-05-23 23:39:02,2064-12-20 12:52:14.436538,2053-04-28 +-46,-28501,810157660,2241,-7769.3,-3860829.99,4232305555838.919922000000000000,True,biology,wendy falkner,2054-06-13 14:12:16,2044-03-24 22:32:03.722424,NULL +31,-9609,1541249928,7659279803863146496,-32124.85,1848079.75,-1733147315988.326172000000000000,True,philosophy,calvin miller,2058-02-09 03:37:29,2039-10-26 21:00:53.243928,2030-11-17 +-35,NULL,NULL,1075,21713.21,-4409416.25,-433003793989.392578000000000000,False,opthamology,,2026-04-19 20:19:23,2060-01-21 11:16:28.358392,1983-12-12 +48,7401,476704350,-7911421221625077760,-23364.57,393045.55,-3706576226699.773438000000000000,False,zync studies,sarah polk,2064-11-08 08:38:31,2047-09-13 00:05:07.65722,2073-09-23 +-29,14773,-1974777102,1774,-41874.29,360339.96,4724948953312.201172000000000000,False,undecided,oscar ellison,2058-08-24 22:30:53,2023-10-22 21:22:34.254262,2064-04-02 +-57,30921,-522450861,8895174927321243648,-7382.06,-4926003.8,4192159288015.525391000000000000,True,topology,david white,2027-03-09 17:30:45,2032-03-22 06:16:46.756458,2077-08-03 +NULL,26557,-407089271,661,-38119.53,-51958.2,-4230513318073.718750000000000000,NULL,quiet hour,wendy carson,2016-08-23 22:18:05,2079-03-05 14:50:42.389412,2102-03-14 +-4,NULL,273256071,-9014145341570203648,-14145.11,-3116102.1,2704318642566.079102000000000000,False,,ulysses white,2050-06-25 15:49:09,2013-10-28 19:25:59.706386,2032-03-19 +47,23441,-978892011,-7049618574399692800,1466.49,-2506000.67,-1261885101463.384766000000000000,False,study skills,xavier underhill,2053-03-08 05:42:09,2059-11-24 17:10:42.836803,2012-02-18 +55,-1928,174310705,-6935038507792801792,-4574.16,-2096084.32,NULL,False,topology,tom king,2068-11-10 06:35:37,2075-12-17 14:54:53.48425,2010-11-13 +-43,2671,851975276,504,35338.95,2409466.9,-4397204039138.387207000000000000,False,industrial engineering,xavier xylophone,2044-09-15 08:07:20,2066-11-14 20:08:56.457818,2097-09-25 +40,-21772,-1832606512,-8831091081349758976,8799.89,1118606.84,868268330921.963867000000000000,False,wind surfing,jessica van buren,2031-08-02 13:03:07,2066-10-02 06:56:23.275227,2089-06-05 +-37,20704,-1506324615,923,NULL,-4894306.72,3900883250375.564453000000000000,False,education,alice ellison,2078-01-19 06:32:53,2034-07-13 13:55:48.52438,NULL +NULL,NULL,-419335927,3263,-9973.58,379816.01,-4708070356227.338867000000000000,True,religion,sarah van buren,2061-02-13 08:24:35,2074-03-23 07:11:45.666149,1994-04-22 +114,-2828,-1144976744,289,-21745.11,1431965.1,-654012413621.529785000000000000,True,history,katie robinson,2075-12-28 00:49:24,2028-08-01 10:04:22.203514,2047-09-09 +-103,-20934,1107757211,8509508263705477120,NULL,507403.73,-4227549209611.147949000000000000,NULL,zync studies,zach robinson,2062-09-02 06:50:21,NULL,2084-08-14 +-87,NULL,NULL,345,-31059.05,-4822074.38,2868093144922.561523000000000000,True,education,zach miller,2055-05-09 16:32:47,2015-11-28 12:47:47.540035,2049-05-06 +83,-5635,-42151403,281,5719.35,-1928099.64,3951352468527.070312000000000000,False,philosophy,calvin young,2016-07-13 04:11:42,2025-05-12 16:31:50.778958,2036-08-04 +-51,-20411,929560791,3637,48956.95,-1601399.55,1334219192158.307617000000000000,True,values clariffication,david polk,2032-08-10 12:27:00,2059-05-03 02:46:31.520651,2048-12-21 +-86,8488,-425196209,-9066993118333706240,28889.32,3917802.65,NULL,True,opthamology,sarah nixon,2050-01-29 20:33:45,2052-07-20 02:09:30.619526,2029-03-12 +77,-11232,1426152053,7354813692542304256,2007.63,-4307343.3,4686486642663.113281000000000000,False,study skills,bob falkner,2018-03-02 05:13:42,2070-06-20 06:01:30.717133,2042-09-29 +NULL,NULL,546555204,2029,6253.69,2528579.3,-3261089774688.485840000000000000,False,,victor thompson,NULL,2060-02-01 14:16:38.477867,2081-09-16 +-44,-16218,NULL,-8807361476639629312,36511.51,286248.72,2844066824227.447266000000000000,False,religion,xavier van buren,2039-09-01 16:08:42,2065-11-28 01:54:34.188054,2039-07-18 +-23,27169,821316302,1371,-21871.99,-3722153.69,-545562668559.717773000000000000,False,undecided,mike brown,2063-04-07 13:36:45,2069-02-11 21:42:06.394121,2054-04-18 +-75,23220,-1421860505,-7221474017515347968,-11848.57,-4271448.15,-1871570979952.359375000000000000,False,mathematics,yuri nixon,2025-10-02 14:38:27,2014-08-18 11:46:05.790786,2037-05-04 +23,NULL,-677778959,8871707618793996288,-37545.98,2442119.38,-555577054032.596680000000000000,False,kindergarten,calvin quirinius,2041-09-15 03:23:20,2017-07-16 08:23:28.728239,2014-06-03 +14,-20192,563507584,7198687580227043328,36052.76,-1306614.08,-1346869656422.100586000000000000,True,chemistry,katie robinson,2063-06-16 08:30:34,2057-04-22 20:48:38.16105,2080-07-26 +-71,30090,1050809633,1990,32100.4,1500446.78,3795668572538.915039000000000000,False,wind surfing,tom garcia,2053-07-13 17:41:24,2019-12-19 14:37:16.693138,2018-01-30 +-96,NULL,-1565671389,8984935029383389184,28720.33,-3180309.64,4280974782610.193359000000000000,False,xylophone band,bob underhill,2078-08-06 00:23:13,2034-07-26 23:30:44.175867,1989-03-16 +74,23177,1910930064,815,45521.78,-4243565.5,-2179991053030.186035000000000000,True,religion,jessica steinbeck,2027-08-11 19:58:53,2025-12-23 12:56:48.785116,2076-03-16 +-6,21162,1941527322,2056,18542.6,96721.07,60247412756.062500000000000000,True,values clariffication,katie ellison,2027-03-11 08:49:50,2014-05-26 15:11:45.90866,2099-05-09 +20,-24115,684561551,8290944180915871744,-32692.85,-793214.39,158268139075.410156000000000000,True,philosophy,oscar van buren,2071-11-06 04:15:33,2078-07-06 07:11:57.809667,2002-07-08 +121,-18533,167432368,228,-49580.62,-1199504.1,-4410893311038.144531000000000000,True,wind surfing,priscilla hernandez,2049-03-14 00:55:26,2015-08-24 11:31:28.973482,2038-10-27 diff --git a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/udf/GenericUDFIcebergDay.java b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/udf/GenericUDFIcebergDay.java index 344961222a2d..fcfada974ccc 100644 --- a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/udf/GenericUDFIcebergDay.java +++ b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/udf/GenericUDFIcebergDay.java @@ -87,7 +87,7 @@ public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumen Function timestampTransform = Transforms.day().bind(Types.TimestampType.withoutZone()); evaluator = arg -> { TimestampWritableV2 val = (TimestampWritableV2) converter.convert(arg.get()); - result.set(timestampTransform.apply(val.getNanos() / 1000L)); + result.set(timestampTransform.apply(Double.valueOf(val.getMicros()).longValue())); }; break; @@ -97,7 +97,7 @@ public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumen Function timestampLocalTzTransform = Transforms.day().bind(Types.TimestampType.withZone()); evaluator = arg -> { TimestampLocalTZWritable val = (TimestampLocalTZWritable) converter.convert(arg.get()); - result.set(timestampLocalTzTransform.apply(val.getNanos() / 1000L)); + result.set(timestampLocalTzTransform.apply(Double.valueOf(val.getMicros()).longValue())); }; break; diff --git a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/udf/GenericUDFIcebergHour.java b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/udf/GenericUDFIcebergHour.java index 9457d56e22cf..879e74b2a99b 100644 --- a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/udf/GenericUDFIcebergHour.java +++ b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/udf/GenericUDFIcebergHour.java @@ -58,12 +58,12 @@ private interface UDFEvalFunction { public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException { if (arguments.length != 1) { throw new UDFArgumentLengthException( - "ICEBERG_YEAR requires 1 arguments (value), but got " + arguments.length); + "ICEBERG_HOUR requires 1 arguments (value), but got " + arguments.length); } if (arguments[0].getCategory() != ObjectInspector.Category.PRIMITIVE) { throw new UDFArgumentException( - "ICEBERG_YEAR first argument takes primitive types, got " + argumentOI.getTypeName()); + "ICEBERG_HOUR first argument takes primitive types, got " + argumentOI.getTypeName()); } argumentOI = (PrimitiveObjectInspector) arguments[0]; @@ -76,7 +76,7 @@ public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumen Function timestampTransform = Transforms.hour().bind(Types.TimestampType.withoutZone()); evaluator = arg -> { TimestampWritableV2 val = (TimestampWritableV2) converter.convert(arg.get()); - result.set(timestampTransform.apply(val.getNanos() / 1000L)); + result.set(timestampTransform.apply(Double.valueOf(val.getMicros()).longValue())); }; break; @@ -86,7 +86,7 @@ public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumen Function timestampLocalTzTransform = Transforms.hour().bind(Types.TimestampType.withZone()); evaluator = arg -> { TimestampLocalTZWritable val = (TimestampLocalTZWritable) converter.convert(arg.get()); - result.set(timestampLocalTzTransform.apply(val.getNanos() / 1000L)); + result.set(timestampLocalTzTransform.apply(Double.valueOf(val.getMicros()).longValue())); }; break; diff --git a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/udf/GenericUDFIcebergMonth.java b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/udf/GenericUDFIcebergMonth.java index 196f5dc6d55d..f5e45f7d6f8c 100644 --- a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/udf/GenericUDFIcebergMonth.java +++ b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/udf/GenericUDFIcebergMonth.java @@ -87,7 +87,7 @@ public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumen Function timestampTransform = Transforms.month().bind(Types.TimestampType.withoutZone()); evaluator = arg -> { TimestampWritableV2 val = (TimestampWritableV2) converter.convert(arg.get()); - result.set(timestampTransform.apply(val.getNanos() / 1000L)); + result.set(timestampTransform.apply(Double.valueOf(val.getMicros()).longValue())); }; break; @@ -97,7 +97,7 @@ public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumen Function timestampLocalTzTransform = Transforms.month().bind(Types.TimestampType.withZone()); evaluator = arg -> { TimestampLocalTZWritable val = (TimestampLocalTZWritable) converter.convert(arg.get()); - result.set(timestampLocalTzTransform.apply(val.getNanos() / 1000L)); + result.set(timestampLocalTzTransform.apply(Double.valueOf(val.getMicros()).longValue())); }; break; diff --git a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/udf/GenericUDFIcebergYear.java b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/udf/GenericUDFIcebergYear.java index 19754bfc01d6..372096f03dcd 100644 --- a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/udf/GenericUDFIcebergYear.java +++ b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/udf/GenericUDFIcebergYear.java @@ -87,7 +87,7 @@ public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumen Function timestampTransform = Transforms.year().bind(Types.TimestampType.withoutZone()); evaluator = arg -> { TimestampWritableV2 val = (TimestampWritableV2) converter.convert(arg.get()); - result.set(timestampTransform.apply(val.getNanos() / 1000L)); + result.set(timestampTransform.apply(Double.valueOf(val.getMicros()).longValue())); }; break; @@ -97,7 +97,7 @@ public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumen Function timestampLocalTzTransform = Transforms.year().bind(Types.TimestampType.withZone()); evaluator = arg -> { TimestampLocalTZWritable val = (TimestampLocalTZWritable) converter.convert(arg.get()); - result.set(timestampLocalTzTransform.apply(val.getNanos() / 1000L)); + result.set(timestampLocalTzTransform.apply(Double.valueOf(val.getMicros()).longValue())); }; break; diff --git a/iceberg/iceberg-handler/src/test/queries/positive/iceberg_clustered.q b/iceberg/iceberg-handler/src/test/queries/positive/iceberg_clustered.q new file mode 100644 index 000000000000..319466893549 --- /dev/null +++ b/iceberg/iceberg-handler/src/test/queries/positive/iceberg_clustered.q @@ -0,0 +1,57 @@ +create database t3; +use t3; + +create table vector1k( + t int, + si int, + i int, + b bigint, + f float, + d double, + dc decimal(38,18), + bo boolean, + s string, + s2 string, + ts timestamp, + ts2 timestamp, + dt date) + row format delimited fields terminated by ','; + +load data local inpath "../../data/files/query-hive-28087.csv" OVERWRITE into table vector1k; + +create table vectortab10k( + t int, + si int, + i int, + b bigint, + f float, + d double, + dc decimal(38,18), + bo boolean, + s string, + s2 string, + ts timestamp, + ts2 timestamp, + dt date) + stored by iceberg + stored as orc; + +insert into vectortab10k select * from vector1k; +select count(*) from vectortab10k ; + +create table partition_transform_year(t int, ts timestamp) partitioned by spec(year(ts)) stored by iceberg; +insert into table partition_transform_year select t, ts from vectortab10k; + +create table partition_transform_month(t int, ts timestamp) partitioned by spec(month(ts)) stored by iceberg; +insert into table partition_transform_month select t, ts from vectortab10k; + +create table partition_transform_day(t int, ts timestamp) partitioned by spec(day(ts)) stored by iceberg; +insert into table partition_transform_day select t, ts from vectortab10k; + +create table partition_transform_hour(t int, ts timestamp) partitioned by spec(hour(ts)) stored by iceberg; +insert into table partition_transform_hour select t, ts from vectortab10k; + +drop table partition_transform_month; +drop table partition_transform_year; +drop table partition_transform_day; +drop table partition_transform_hour; diff --git a/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_clustered.q.out b/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_clustered.q.out new file mode 100644 index 000000000000..d8e6a176bba0 --- /dev/null +++ b/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_clustered.q.out @@ -0,0 +1,215 @@ +PREHOOK: query: create database t3 +PREHOOK: type: CREATEDATABASE +PREHOOK: Output: database:t3 +POSTHOOK: query: create database t3 +POSTHOOK: type: CREATEDATABASE +POSTHOOK: Output: database:t3 +PREHOOK: query: use t3 +PREHOOK: type: SWITCHDATABASE +PREHOOK: Input: database:t3 +POSTHOOK: query: use t3 +POSTHOOK: type: SWITCHDATABASE +POSTHOOK: Input: database:t3 +PREHOOK: query: create table vector1k( + t int, + si int, + i int, + b bigint, + f float, + d double, + dc decimal(38,18), + bo boolean, + s string, + s2 string, + ts timestamp, + ts2 timestamp, + dt date) + row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:t3 +PREHOOK: Output: t3@vector1k +POSTHOOK: query: create table vector1k( + t int, + si int, + i int, + b bigint, + f float, + d double, + dc decimal(38,18), + bo boolean, + s string, + s2 string, + ts timestamp, + ts2 timestamp, + dt date) + row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:t3 +POSTHOOK: Output: t3@vector1k +PREHOOK: query: load data local inpath "../../data/files/query-hive-28087.csv" OVERWRITE into table vector1k +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: t3@vector1k +POSTHOOK: query: load data local inpath "../../data/files/query-hive-28087.csv" OVERWRITE into table vector1k +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: t3@vector1k +PREHOOK: query: create table vectortab10k( + t int, + si int, + i int, + b bigint, + f float, + d double, + dc decimal(38,18), + bo boolean, + s string, + s2 string, + ts timestamp, + ts2 timestamp, + dt date) + stored by iceberg + stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:t3 +PREHOOK: Output: t3@vectortab10k +POSTHOOK: query: create table vectortab10k( + t int, + si int, + i int, + b bigint, + f float, + d double, + dc decimal(38,18), + bo boolean, + s string, + s2 string, + ts timestamp, + ts2 timestamp, + dt date) + stored by iceberg + stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:t3 +POSTHOOK: Output: t3@vectortab10k +PREHOOK: query: insert into vectortab10k select * from vector1k +PREHOOK: type: QUERY +PREHOOK: Input: t3@vector1k +PREHOOK: Output: t3@vectortab10k +POSTHOOK: query: insert into vectortab10k select * from vector1k +POSTHOOK: type: QUERY +POSTHOOK: Input: t3@vector1k +POSTHOOK: Output: t3@vectortab10k +PREHOOK: query: select count(*) from vectortab10k +PREHOOK: type: QUERY +PREHOOK: Input: t3@vectortab10k +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from vectortab10k +POSTHOOK: type: QUERY +POSTHOOK: Input: t3@vectortab10k +#### A masked pattern was here #### +47 +PREHOOK: query: create table partition_transform_year(t int, ts timestamp) partitioned by spec(year(ts)) stored by iceberg +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:t3 +PREHOOK: Output: t3@partition_transform_year +POSTHOOK: query: create table partition_transform_year(t int, ts timestamp) partitioned by spec(year(ts)) stored by iceberg +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:t3 +POSTHOOK: Output: t3@partition_transform_year +PREHOOK: query: insert into table partition_transform_year select t, ts from vectortab10k +PREHOOK: type: QUERY +PREHOOK: Input: t3@vectortab10k +PREHOOK: Output: t3@partition_transform_year +POSTHOOK: query: insert into table partition_transform_year select t, ts from vectortab10k +POSTHOOK: type: QUERY +POSTHOOK: Input: t3@vectortab10k +POSTHOOK: Output: t3@partition_transform_year +PREHOOK: query: create table partition_transform_month(t int, ts timestamp) partitioned by spec(month(ts)) stored by iceberg +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:t3 +PREHOOK: Output: t3@partition_transform_month +POSTHOOK: query: create table partition_transform_month(t int, ts timestamp) partitioned by spec(month(ts)) stored by iceberg +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:t3 +POSTHOOK: Output: t3@partition_transform_month +PREHOOK: query: insert into table partition_transform_month select t, ts from vectortab10k +PREHOOK: type: QUERY +PREHOOK: Input: t3@vectortab10k +PREHOOK: Output: t3@partition_transform_month +POSTHOOK: query: insert into table partition_transform_month select t, ts from vectortab10k +POSTHOOK: type: QUERY +POSTHOOK: Input: t3@vectortab10k +POSTHOOK: Output: t3@partition_transform_month +PREHOOK: query: create table partition_transform_day(t int, ts timestamp) partitioned by spec(day(ts)) stored by iceberg +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:t3 +PREHOOK: Output: t3@partition_transform_day +POSTHOOK: query: create table partition_transform_day(t int, ts timestamp) partitioned by spec(day(ts)) stored by iceberg +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:t3 +POSTHOOK: Output: t3@partition_transform_day +PREHOOK: query: insert into table partition_transform_day select t, ts from vectortab10k +PREHOOK: type: QUERY +PREHOOK: Input: t3@vectortab10k +PREHOOK: Output: t3@partition_transform_day +POSTHOOK: query: insert into table partition_transform_day select t, ts from vectortab10k +POSTHOOK: type: QUERY +POSTHOOK: Input: t3@vectortab10k +POSTHOOK: Output: t3@partition_transform_day +PREHOOK: query: create table partition_transform_hour(t int, ts timestamp) partitioned by spec(hour(ts)) stored by iceberg +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:t3 +PREHOOK: Output: t3@partition_transform_hour +POSTHOOK: query: create table partition_transform_hour(t int, ts timestamp) partitioned by spec(hour(ts)) stored by iceberg +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:t3 +POSTHOOK: Output: t3@partition_transform_hour +PREHOOK: query: insert into table partition_transform_hour select t, ts from vectortab10k +PREHOOK: type: QUERY +PREHOOK: Input: t3@vectortab10k +PREHOOK: Output: t3@partition_transform_hour +POSTHOOK: query: insert into table partition_transform_hour select t, ts from vectortab10k +POSTHOOK: type: QUERY +POSTHOOK: Input: t3@vectortab10k +POSTHOOK: Output: t3@partition_transform_hour +PREHOOK: query: drop table partition_transform_month +PREHOOK: type: DROPTABLE +PREHOOK: Input: t3@partition_transform_month +PREHOOK: Output: database:t3 +PREHOOK: Output: t3@partition_transform_month +POSTHOOK: query: drop table partition_transform_month +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: t3@partition_transform_month +POSTHOOK: Output: database:t3 +POSTHOOK: Output: t3@partition_transform_month +PREHOOK: query: drop table partition_transform_year +PREHOOK: type: DROPTABLE +PREHOOK: Input: t3@partition_transform_year +PREHOOK: Output: database:t3 +PREHOOK: Output: t3@partition_transform_year +POSTHOOK: query: drop table partition_transform_year +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: t3@partition_transform_year +POSTHOOK: Output: database:t3 +POSTHOOK: Output: t3@partition_transform_year +PREHOOK: query: drop table partition_transform_day +PREHOOK: type: DROPTABLE +PREHOOK: Input: t3@partition_transform_day +PREHOOK: Output: database:t3 +PREHOOK: Output: t3@partition_transform_day +POSTHOOK: query: drop table partition_transform_day +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: t3@partition_transform_day +POSTHOOK: Output: database:t3 +POSTHOOK: Output: t3@partition_transform_day +PREHOOK: query: drop table partition_transform_hour +PREHOOK: type: DROPTABLE +PREHOOK: Input: t3@partition_transform_hour +PREHOOK: Output: database:t3 +PREHOOK: Output: t3@partition_transform_hour +POSTHOOK: query: drop table partition_transform_hour +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: t3@partition_transform_hour +POSTHOOK: Output: database:t3 +POSTHOOK: Output: t3@partition_transform_hour diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties index cda63a9b065d..c50c5a9b77e6 100644 --- a/itests/src/test/resources/testconfiguration.properties +++ b/itests/src/test/resources/testconfiguration.properties @@ -410,6 +410,7 @@ erasurecoding.only.query.files=\ erasure_simple.q iceberg.llap.query.files=\ + iceberg_clustered.q,\ llap_iceberg_read_orc.q,\ llap_iceberg_read_parquet.q,\ vectorized_iceberg_read_mixed.q,\ @@ -425,6 +426,7 @@ iceberg.llap.query.compactor.files=\ iceberg_optimize_table_unpartitioned.q iceberg.llap.only.query.files=\ + iceberg_clustered.q,\ llap_iceberg_read_orc.q,\ llap_iceberg_read_parquet.q diff --git a/serde/src/java/org/apache/hadoop/hive/serde2/io/TimestampLocalTZWritable.java b/serde/src/java/org/apache/hadoop/hive/serde2/io/TimestampLocalTZWritable.java index 1049c907971f..e612d66fd8ed 100644 --- a/serde/src/java/org/apache/hadoop/hive/serde2/io/TimestampLocalTZWritable.java +++ b/serde/src/java/org/apache/hadoop/hive/serde2/io/TimestampLocalTZWritable.java @@ -214,6 +214,22 @@ public long getSeconds() { throw new IllegalStateException("Both timestamp and bytes are empty"); } + /** + * + * @return double representation of the timestampTZ, accurate to microseconds + */ + public double getMicros() { + double seconds, nanos; + if (bytesEmpty) { + seconds = timestampTZ.getEpochSecond(); + nanos = timestampTZ.getNanos(); + } else { + seconds = getSeconds(); + nanos = getNanos(); + } + return seconds * 1e6 + nanos / 1e3; + } + public int getNanos() { if (!timestampTZEmpty) { return timestampTZ.getNanos(); diff --git a/serde/src/java/org/apache/hadoop/hive/serde2/io/TimestampWritableV2.java b/serde/src/java/org/apache/hadoop/hive/serde2/io/TimestampWritableV2.java index 9aa7f19ab2a2..f3c731b58a84 100644 --- a/serde/src/java/org/apache/hadoop/hive/serde2/io/TimestampWritableV2.java +++ b/serde/src/java/org/apache/hadoop/hive/serde2/io/TimestampWritableV2.java @@ -302,6 +302,22 @@ public double getDouble() { return seconds + nanos / 1000000000; } + /** + * + * @return double representation of the timestamp, accurate to microseconds + */ + public double getMicros() { + double seconds, nanos; + if (bytesEmpty) { + seconds = timestamp.toEpochSecond(); + nanos = timestamp.getNanos(); + } else { + seconds = getSeconds(); + nanos = getNanos(); + } + return seconds * 1e6 + nanos / 1e3; + } + public static long getLong(Timestamp timestamp) { return timestamp.toEpochSecond(); }