Skip to content

Commit

Permalink
HIVE-28087: Iceberg: Timestamp partition columns with transforms are …
Browse files Browse the repository at this point in the history
…not correctly sorted during insert (#5134) (Sourabh Badhya reviewed by Ayush Saxena, Simhadri Govindappa)
  • Loading branch information
SourabhBadhya committed Mar 22, 2024
1 parent 8eee4aa commit f216bbb
Show file tree
Hide file tree
Showing 10 changed files with 363 additions and 10 deletions.
47 changes: 47 additions & 0 deletions data/files/query-hive-28087.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
vectortab10k.t,vectortab10k.si,vectortab10k.i,vectortab10k.b,vectortab10k.f,vectortab10k.d,vectortab10k.dc,vectortab10k.bo,vectortab10k.s,vectortab10k.s2,vectortab10k.ts,vectortab10k.ts2,vectortab10k.dt
-111,NULL,NULL,2607,5765.39,3882592.64,3785719054585.463867000000000000,True,mathematics,nick xylophone,2065-08-13 19:03:52,2018-11-14 17:26:37.322428,2086-05-13
-87,21091,-1012329052,9182828596851990528,22311.9,2883584.5,3563455992720.427734000000000000,True,quiet hour,david quirinius,2047-08-30 02:59:28,2062-07-07 09:32:33.387727,2083-11-21
-58,-31395,-1730740504,302,11322.18,-2081693.61,-1639547871112.334473000000000000,True,industrial engineering,xavier laertes,2057-09-02 10:26:29,2074-04-07 03:59:19.110762,NULL
-19,133,95356298,78,-15490.77,-642668.92,NULL,True,american history,tom ovid,2059-06-14 15:58:37,2021-07-06 11:56:50.113924,NULL
95,27922,-2027812975,3764,-38980.4,-497550.81,4616197451686.900391000000000000,False,education,wendy davidson,2031-12-06 02:13:55,2037-04-01 15:48:02.70772,2013-05-01
23,-9171,-805288503,7432428551399669760,-35391.71,-1387292.76,-4384441919303.938477000000000000,NULL,nap time,xavier hernandez,2069-02-22 06:47:15,2068-04-02 03:24:11.192596,2048-06-21
7,725,NULL,3118,-12519.93,-1305193.84,3984781579512.529297000000000000,True,forestry,david davidson,2071-01-16 20:21:17,2049-05-02 13:53:42.230477,2030-02-20
52,8918,-772236518,7250237407877382144,32666.2,-3997512.4,-4361210532142.081055000000000000,True,religion,,2068-10-07 03:23:30,NULL,2010-12-13
54,-30304,-146961490,8100036735858401280,38097.34,300734.43,-101907680156.507812000000000000,False,wind surfing,alice ellison,2076-07-17 05:35:58,2017-10-19 13:37:33.310745,1985-06-25
36,-10317,-538812082,7792036342592348160,36810.38,1666710.2,1617263763890.299805000000000000,True,debate,xavier king,NULL,2049-11-23 23:25:42.906731,2025-10-11
-92,21849,868714547,-7496839341561954304,32711.55,-2738225.86,-946920531679.183105000000000000,False,biology,zach falkner,2064-05-11 04:54:01,2041-04-22 19:12:27.123637,2020-04-05
-127,7353,1008698636,1719,4983.6,-2636583.28,2290285251062.656250000000000000,False,american history,ethan polk,2029-05-28 08:57:08,2071-11-04 02:44:19.546016,2048-06-29
73,16195,737149747,8283099811330506752,5758.0,2121137.53,-3867208889795.392578000000000000,True,philosophy,sarah davidson,2061-05-23 23:39:02,2064-12-20 12:52:14.436538,2053-04-28
-46,-28501,810157660,2241,-7769.3,-3860829.99,4232305555838.919922000000000000,True,biology,wendy falkner,2054-06-13 14:12:16,2044-03-24 22:32:03.722424,NULL
31,-9609,1541249928,7659279803863146496,-32124.85,1848079.75,-1733147315988.326172000000000000,True,philosophy,calvin miller,2058-02-09 03:37:29,2039-10-26 21:00:53.243928,2030-11-17
-35,NULL,NULL,1075,21713.21,-4409416.25,-433003793989.392578000000000000,False,opthamology,,2026-04-19 20:19:23,2060-01-21 11:16:28.358392,1983-12-12
48,7401,476704350,-7911421221625077760,-23364.57,393045.55,-3706576226699.773438000000000000,False,zync studies,sarah polk,2064-11-08 08:38:31,2047-09-13 00:05:07.65722,2073-09-23
-29,14773,-1974777102,1774,-41874.29,360339.96,4724948953312.201172000000000000,False,undecided,oscar ellison,2058-08-24 22:30:53,2023-10-22 21:22:34.254262,2064-04-02
-57,30921,-522450861,8895174927321243648,-7382.06,-4926003.8,4192159288015.525391000000000000,True,topology,david white,2027-03-09 17:30:45,2032-03-22 06:16:46.756458,2077-08-03
NULL,26557,-407089271,661,-38119.53,-51958.2,-4230513318073.718750000000000000,NULL,quiet hour,wendy carson,2016-08-23 22:18:05,2079-03-05 14:50:42.389412,2102-03-14
-4,NULL,273256071,-9014145341570203648,-14145.11,-3116102.1,2704318642566.079102000000000000,False,,ulysses white,2050-06-25 15:49:09,2013-10-28 19:25:59.706386,2032-03-19
47,23441,-978892011,-7049618574399692800,1466.49,-2506000.67,-1261885101463.384766000000000000,False,study skills,xavier underhill,2053-03-08 05:42:09,2059-11-24 17:10:42.836803,2012-02-18
55,-1928,174310705,-6935038507792801792,-4574.16,-2096084.32,NULL,False,topology,tom king,2068-11-10 06:35:37,2075-12-17 14:54:53.48425,2010-11-13
-43,2671,851975276,504,35338.95,2409466.9,-4397204039138.387207000000000000,False,industrial engineering,xavier xylophone,2044-09-15 08:07:20,2066-11-14 20:08:56.457818,2097-09-25
40,-21772,-1832606512,-8831091081349758976,8799.89,1118606.84,868268330921.963867000000000000,False,wind surfing,jessica van buren,2031-08-02 13:03:07,2066-10-02 06:56:23.275227,2089-06-05
-37,20704,-1506324615,923,NULL,-4894306.72,3900883250375.564453000000000000,False,education,alice ellison,2078-01-19 06:32:53,2034-07-13 13:55:48.52438,NULL
NULL,NULL,-419335927,3263,-9973.58,379816.01,-4708070356227.338867000000000000,True,religion,sarah van buren,2061-02-13 08:24:35,2074-03-23 07:11:45.666149,1994-04-22
114,-2828,-1144976744,289,-21745.11,1431965.1,-654012413621.529785000000000000,True,history,katie robinson,2075-12-28 00:49:24,2028-08-01 10:04:22.203514,2047-09-09
-103,-20934,1107757211,8509508263705477120,NULL,507403.73,-4227549209611.147949000000000000,NULL,zync studies,zach robinson,2062-09-02 06:50:21,NULL,2084-08-14
-87,NULL,NULL,345,-31059.05,-4822074.38,2868093144922.561523000000000000,True,education,zach miller,2055-05-09 16:32:47,2015-11-28 12:47:47.540035,2049-05-06
83,-5635,-42151403,281,5719.35,-1928099.64,3951352468527.070312000000000000,False,philosophy,calvin young,2016-07-13 04:11:42,2025-05-12 16:31:50.778958,2036-08-04
-51,-20411,929560791,3637,48956.95,-1601399.55,1334219192158.307617000000000000,True,values clariffication,david polk,2032-08-10 12:27:00,2059-05-03 02:46:31.520651,2048-12-21
-86,8488,-425196209,-9066993118333706240,28889.32,3917802.65,NULL,True,opthamology,sarah nixon,2050-01-29 20:33:45,2052-07-20 02:09:30.619526,2029-03-12
77,-11232,1426152053,7354813692542304256,2007.63,-4307343.3,4686486642663.113281000000000000,False,study skills,bob falkner,2018-03-02 05:13:42,2070-06-20 06:01:30.717133,2042-09-29
NULL,NULL,546555204,2029,6253.69,2528579.3,-3261089774688.485840000000000000,False,,victor thompson,NULL,2060-02-01 14:16:38.477867,2081-09-16
-44,-16218,NULL,-8807361476639629312,36511.51,286248.72,2844066824227.447266000000000000,False,religion,xavier van buren,2039-09-01 16:08:42,2065-11-28 01:54:34.188054,2039-07-18
-23,27169,821316302,1371,-21871.99,-3722153.69,-545562668559.717773000000000000,False,undecided,mike brown,2063-04-07 13:36:45,2069-02-11 21:42:06.394121,2054-04-18
-75,23220,-1421860505,-7221474017515347968,-11848.57,-4271448.15,-1871570979952.359375000000000000,False,mathematics,yuri nixon,2025-10-02 14:38:27,2014-08-18 11:46:05.790786,2037-05-04
23,NULL,-677778959,8871707618793996288,-37545.98,2442119.38,-555577054032.596680000000000000,False,kindergarten,calvin quirinius,2041-09-15 03:23:20,2017-07-16 08:23:28.728239,2014-06-03
14,-20192,563507584,7198687580227043328,36052.76,-1306614.08,-1346869656422.100586000000000000,True,chemistry,katie robinson,2063-06-16 08:30:34,2057-04-22 20:48:38.16105,2080-07-26
-71,30090,1050809633,1990,32100.4,1500446.78,3795668572538.915039000000000000,False,wind surfing,tom garcia,2053-07-13 17:41:24,2019-12-19 14:37:16.693138,2018-01-30
-96,NULL,-1565671389,8984935029383389184,28720.33,-3180309.64,4280974782610.193359000000000000,False,xylophone band,bob underhill,2078-08-06 00:23:13,2034-07-26 23:30:44.175867,1989-03-16
74,23177,1910930064,815,45521.78,-4243565.5,-2179991053030.186035000000000000,True,religion,jessica steinbeck,2027-08-11 19:58:53,2025-12-23 12:56:48.785116,2076-03-16
-6,21162,1941527322,2056,18542.6,96721.07,60247412756.062500000000000000,True,values clariffication,katie ellison,2027-03-11 08:49:50,2014-05-26 15:11:45.90866,2099-05-09
20,-24115,684561551,8290944180915871744,-32692.85,-793214.39,158268139075.410156000000000000,True,philosophy,oscar van buren,2071-11-06 04:15:33,2078-07-06 07:11:57.809667,2002-07-08
121,-18533,167432368,228,-49580.62,-1199504.1,-4410893311038.144531000000000000,True,wind surfing,priscilla hernandez,2049-03-14 00:55:26,2015-08-24 11:31:28.973482,2038-10-27
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumen
Function<Object, Integer> timestampTransform = Transforms.day().bind(Types.TimestampType.withoutZone());
evaluator = arg -> {
TimestampWritableV2 val = (TimestampWritableV2) converter.convert(arg.get());
result.set(timestampTransform.apply(val.getNanos() / 1000L));
result.set(timestampTransform.apply(Double.valueOf(val.getMicros()).longValue()));
};
break;

Expand All @@ -97,7 +97,7 @@ public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumen
Function<Object, Integer> timestampLocalTzTransform = Transforms.day().bind(Types.TimestampType.withZone());
evaluator = arg -> {
TimestampLocalTZWritable val = (TimestampLocalTZWritable) converter.convert(arg.get());
result.set(timestampLocalTzTransform.apply(val.getNanos() / 1000L));
result.set(timestampLocalTzTransform.apply(Double.valueOf(val.getMicros()).longValue()));
};
break;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -58,12 +58,12 @@ private interface UDFEvalFunction<T> {
public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException {
if (arguments.length != 1) {
throw new UDFArgumentLengthException(
"ICEBERG_YEAR requires 1 arguments (value), but got " + arguments.length);
"ICEBERG_HOUR requires 1 arguments (value), but got " + arguments.length);
}

if (arguments[0].getCategory() != ObjectInspector.Category.PRIMITIVE) {
throw new UDFArgumentException(
"ICEBERG_YEAR first argument takes primitive types, got " + argumentOI.getTypeName());
"ICEBERG_HOUR first argument takes primitive types, got " + argumentOI.getTypeName());
}
argumentOI = (PrimitiveObjectInspector) arguments[0];

Expand All @@ -76,7 +76,7 @@ public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumen
Function<Object, Integer> timestampTransform = Transforms.hour().bind(Types.TimestampType.withoutZone());
evaluator = arg -> {
TimestampWritableV2 val = (TimestampWritableV2) converter.convert(arg.get());
result.set(timestampTransform.apply(val.getNanos() / 1000L));
result.set(timestampTransform.apply(Double.valueOf(val.getMicros()).longValue()));
};
break;

Expand All @@ -86,7 +86,7 @@ public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumen
Function<Object, Integer> timestampLocalTzTransform = Transforms.hour().bind(Types.TimestampType.withZone());
evaluator = arg -> {
TimestampLocalTZWritable val = (TimestampLocalTZWritable) converter.convert(arg.get());
result.set(timestampLocalTzTransform.apply(val.getNanos() / 1000L));
result.set(timestampLocalTzTransform.apply(Double.valueOf(val.getMicros()).longValue()));
};
break;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumen
Function<Object, Integer> timestampTransform = Transforms.month().bind(Types.TimestampType.withoutZone());
evaluator = arg -> {
TimestampWritableV2 val = (TimestampWritableV2) converter.convert(arg.get());
result.set(timestampTransform.apply(val.getNanos() / 1000L));
result.set(timestampTransform.apply(Double.valueOf(val.getMicros()).longValue()));
};
break;

Expand All @@ -97,7 +97,7 @@ public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumen
Function<Object, Integer> timestampLocalTzTransform = Transforms.month().bind(Types.TimestampType.withZone());
evaluator = arg -> {
TimestampLocalTZWritable val = (TimestampLocalTZWritable) converter.convert(arg.get());
result.set(timestampLocalTzTransform.apply(val.getNanos() / 1000L));
result.set(timestampLocalTzTransform.apply(Double.valueOf(val.getMicros()).longValue()));
};
break;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumen
Function<Object, Integer> timestampTransform = Transforms.year().bind(Types.TimestampType.withoutZone());
evaluator = arg -> {
TimestampWritableV2 val = (TimestampWritableV2) converter.convert(arg.get());
result.set(timestampTransform.apply(val.getNanos() / 1000L));
result.set(timestampTransform.apply(Double.valueOf(val.getMicros()).longValue()));
};
break;

Expand All @@ -97,7 +97,7 @@ public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumen
Function<Object, Integer> timestampLocalTzTransform = Transforms.year().bind(Types.TimestampType.withZone());
evaluator = arg -> {
TimestampLocalTZWritable val = (TimestampLocalTZWritable) converter.convert(arg.get());
result.set(timestampLocalTzTransform.apply(val.getNanos() / 1000L));
result.set(timestampLocalTzTransform.apply(Double.valueOf(val.getMicros()).longValue()));
};
break;

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
create database t3;
use t3;

create table vector1k(
t int,
si int,
i int,
b bigint,
f float,
d double,
dc decimal(38,18),
bo boolean,
s string,
s2 string,
ts timestamp,
ts2 timestamp,
dt date)
row format delimited fields terminated by ',';

load data local inpath "../../data/files/query-hive-28087.csv" OVERWRITE into table vector1k;

create table vectortab10k(
t int,
si int,
i int,
b bigint,
f float,
d double,
dc decimal(38,18),
bo boolean,
s string,
s2 string,
ts timestamp,
ts2 timestamp,
dt date)
stored by iceberg
stored as orc;

insert into vectortab10k select * from vector1k;
select count(*) from vectortab10k ;

create table partition_transform_year(t int, ts timestamp) partitioned by spec(year(ts)) stored by iceberg;
insert into table partition_transform_year select t, ts from vectortab10k;

create table partition_transform_month(t int, ts timestamp) partitioned by spec(month(ts)) stored by iceberg;
insert into table partition_transform_month select t, ts from vectortab10k;

create table partition_transform_day(t int, ts timestamp) partitioned by spec(day(ts)) stored by iceberg;
insert into table partition_transform_day select t, ts from vectortab10k;

create table partition_transform_hour(t int, ts timestamp) partitioned by spec(hour(ts)) stored by iceberg;
insert into table partition_transform_hour select t, ts from vectortab10k;

drop table partition_transform_month;
drop table partition_transform_year;
drop table partition_transform_day;
drop table partition_transform_hour;
Loading

0 comments on commit f216bbb

Please sign in to comment.