@@ -528,6 +528,17 @@ def compute_time_variable_name(dataset, lat_var):
528
528
raise ValueError ('Unable to determine time variable' )
529
529
530
530
531
+ def compute_utc_name (dataset ):
532
+ """
533
+ Get the name of the utc variable if it is there to determine origine time
534
+ """
535
+ for var_name in list (dataset .data_vars .keys ()):
536
+ if 'utc' in var_name .lower () and 'time' in var_name .lower ():
537
+ return var_name
538
+
539
+ return None
540
+
541
+
531
542
def get_time_epoch_var (dataset , time_var_name ):
532
543
"""
533
544
Get the name of the epoch time var. This is only needed in the case
@@ -686,7 +697,6 @@ def build_cond(str_timestamp, compare):
686
697
timestamp = pd .to_datetime (timestamp )
687
698
if np .issubdtype (dataset [time_var_name ].dtype , np .dtype (np .timedelta64 )):
688
699
if is_time_mjd (dataset , time_var_name ):
689
- # mjd when timedelta based on
690
700
mjd_datetime = datetime_from_mjd (dataset , time_var_name )
691
701
if mjd_datetime is None :
692
702
raise ValueError ('Unable to get datetime from dataset to calculate time delta' )
@@ -698,10 +708,6 @@ def build_cond(str_timestamp, compare):
698
708
epoch_datetime = dataset [epoch_time_var_name ].values [0 ]
699
709
timestamp = np .datetime64 (timestamp ) - epoch_datetime
700
710
701
- if np .issubdtype (dataset [time_var_name ].dtype , np .dtype (float )):
702
- start_date = np .datetime64 (dataset [time_var_name ].attrs ['Units' ][- 10 :])
703
- timestamp = (np .datetime64 (timestamp ) - start_date ).astype ('timedelta64[s]' ).astype ('float' )
704
-
705
711
return compare (dataset [time_var_name ], timestamp )
706
712
707
713
temporal_conds = []
@@ -1003,13 +1009,12 @@ def _rename_variables(dataset, base_dataset):
1003
1009
var_group = _get_nested_group (base_dataset , var_name )
1004
1010
variable = dataset .variables [var_name ]
1005
1011
var_dims = [x .split (GROUP_DELIM )[- 1 ] for x in dataset .variables [var_name ].dims ]
1006
-
1007
1012
if np .issubdtype (
1008
1013
dataset .variables [var_name ].dtype , np .dtype (np .datetime64 )
1009
1014
) or np .issubdtype (
1010
1015
dataset .variables [var_name ].dtype , np .dtype (np .timedelta64 )
1011
1016
):
1012
- # Use xarray datetime encoder
1017
+
1013
1018
cf_dt_coder = xr .coding .times .CFDatetimeCoder ()
1014
1019
encoded_var = cf_dt_coder .encode (dataset .variables [var_name ])
1015
1020
variable = encoded_var
@@ -1102,6 +1107,7 @@ def get_coordinate_variable_names(dataset, lat_var_names=None, lon_var_names=Non
1102
1107
time_var_names : list
1103
1108
List of time coordinate variables.
1104
1109
"""
1110
+
1105
1111
if not lat_var_names or not lon_var_names :
1106
1112
lat_var_names , lon_var_names = compute_coordinate_variable_names (dataset )
1107
1113
if not time_var_names :
@@ -1110,9 +1116,35 @@ def get_coordinate_variable_names(dataset, lat_var_names=None, lon_var_names=Non
1110
1116
dataset , dataset [lat_var_name ]
1111
1117
) for lat_var_name in lat_var_names
1112
1118
]
1119
+ time_var_names .append (compute_utc_name (dataset ))
1120
+ time_var_names = list (dict .fromkeys ([x for x in time_var_names if x is not None ])) # remove Nones and any duplicates
1121
+
1113
1122
return lat_var_names , lon_var_names , time_var_names
1114
1123
1115
1124
1125
+ def convert_to_datetime (dataset , time_vars ):
1126
+ """
1127
+ converts the time variable to datetime if xarray doesn't decode times
1128
+ """
1129
+ for var in time_vars :
1130
+ start_date = datetime .datetime .strptime ("1993-01-01T00:00:00.00" , "%Y-%m-%dT%H:%M:%S.%f" ) if any ('TAI93' in str (dataset [var ].attrs [attribute_name ])
1131
+ for attribute_name in dataset [var ].attrs ) else None
1132
+
1133
+ if np .issubdtype (dataset [var ].dtype , np .dtype (float )):
1134
+ # adjust the time values from the start date
1135
+ if start_date :
1136
+ dataset [var ].values = [start_date + datetime .timedelta (seconds = i ) for i in dataset [var ].values ]
1137
+ # copy the values from the utc time variable to the time variable
1138
+ else :
1139
+ utc_var_name = compute_utc_name (dataset )
1140
+ if utc_var_name :
1141
+ dataset [var ].values = [datetime .datetime (i [0 ], i [1 ], i [2 ], hour = i [3 ], minute = i [4 ], second = i [5 ]) for i in dataset [utc_var_name ].values ]
1142
+ else :
1143
+ pass
1144
+
1145
+ return dataset
1146
+
1147
+
1116
1148
def subset (file_to_subset , bbox , output_file , variables = None ,
1117
1149
# pylint: disable=too-many-branches, disable=too-many-statements
1118
1150
cut = True , shapefile = None , min_time = None , max_time = None , origin_source = None ,
@@ -1169,7 +1201,6 @@ def subset(file_to_subset, bbox, output_file, variables=None,
1169
1201
1170
1202
if file_extension == 'he5' :
1171
1203
nc_dataset , has_groups = h5file_transform (file_to_subset )
1172
-
1173
1204
else :
1174
1205
# Open dataset with netCDF4 first, so we can get group info
1175
1206
nc_dataset = nc .Dataset (file_to_subset , mode = 'r' )
@@ -1202,11 +1233,11 @@ def subset(file_to_subset, bbox, output_file, variables=None,
1202
1233
lon_var_names = lon_var_names ,
1203
1234
time_var_names = time_var_names
1204
1235
)
1205
-
1236
+ if min_time or max_time :
1237
+ dataset = convert_to_datetime (dataset , time_var_names )
1206
1238
chunks = calculate_chunks (dataset )
1207
1239
if chunks :
1208
1240
dataset = dataset .chunk (chunks )
1209
-
1210
1241
if variables :
1211
1242
# Drop variables that aren't explicitly requested, except lat_var_name and
1212
1243
# lon_var_name which are needed for subsetting
@@ -1266,7 +1297,6 @@ def subset(file_to_subset, bbox, output_file, variables=None,
1266
1297
} for var_name in time_var_names
1267
1298
if 'units' in nc_dataset .variables [var_name ].__dict__
1268
1299
}
1269
-
1270
1300
for var in dataset .data_vars :
1271
1301
if var not in encoding :
1272
1302
encoding [var ] = compression
0 commit comments