-
Notifications
You must be signed in to change notification settings - Fork 4
/
utils.py
149 lines (124 loc) · 4.47 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
import os
from datetime import datetime
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import pyproj
import cartopy.util as cutil
import xarray as xr
import warnings
warnings.filterwarnings("ignore", category=RuntimeWarning)
geodesic = pyproj.Geod(ellps="WGS84")
DATA_DIR = "/g/data/w85/data/tc"
def savefig(filename, *args, **kwargs):
"""
Add a timestamp to each figure when saving
:param str filename: Path to store the figure at
:param args: Additional arguments to pass to `plt.savefig`
:param kwargs: Additional keyword arguments to pass to `plt.savefig`
"""
fig = plt.gcf()
plt.text(0.99, 0.0, f"Created: {datetime.now():%Y-%m-%d %H:%M %z}",
transform=fig.transFigure, ha='right', va='bottom',
fontsize='xx-small')
plt.savefig(filename, *args, **kwargs)
def load_ibtracs_df(basins=None, season=None):
"""
Helper function to load the IBTrACS database.
By default, we use the BOM_WIND
:param int season: Season to filter data by
:param list basins: select only those TCs from the given basins
"""
dataFile = os.path.join(DATA_DIR, "ibtracs.since1980.list.v04r00.csv")
df = pd.read_csv(
dataFile,
skiprows=[1],
usecols=[0, 1, 3, 5, 6, 8, 9, 10, 11, 13, 23, 95],
keep_default_na=False,
na_values=[" "],
parse_dates=[1],
date_format="%Y-%m-%d %H:%M:%S",
)
df.rename(
columns={
"SID": "DISTURBANCE_ID",
"ISO_TIME": "TM",
"WMO_PRES": "CENTRAL_PRES",
"BOM_WIND": "MAX_WIND_SPD"},
inplace=True,
)
df["TM"] = pd.to_datetime(
df.TM, format="%Y-%m-%d %H:%M:%S", errors="coerce")
df = df[~pd.isnull(df.TM)]
# Filter to every 6 hours:
df["hour"] = df["TM"].dt.hour
df = df[df["hour"].isin([0, 6, 12, 18])]
df.drop(columns=["hour"], inplace=True)
# Move longitudes to [0, 360)
df.loc[df['LON'] < 0, "LON"] = df['LON'] + 360
# Filter by season if given:
df['SEASON'] = df['SEASON'].astype(int)
if season:
df = df[df.SEASON == season]
# IBTrACS includes spur tracks (bits of tracks that are
# different to the official) - these need to be dropped.
df = df[df.TRACK_TYPE.isin(["main", "PROVISIONAL"])]
# Filter by basins if given
if basins:
df = df[df["BASIN"].isin(basins)]
# Fill any missing wind speed reports with data from US records
# We have to convert from a 1-minute sustained wind to a
# 10-minute mean, using the conversions described in WMO TD1555 (2010).
# NOTE:
# 1) I don't handle the case of New Dehli, which uses a 3-minute mean
# wind speed.
# 2) Returns wind speeds in knots (rounded to 2 decimal places)
df.fillna({"MAX_WIND_SPD": df["WMO_WIND"]}, inplace=True)
df.fillna({"MAX_WIND_SPD": df["USA_WIND"] / 1.029}, inplace=True)
df["MAX_WIND_SPD"] = np.round(df["MAX_WIND_SPD"], 2)
df.reset_index(inplace=True)
fwd_azimuth, _, distances = geodesic.inv(
df.LON[:-1],
df.LAT[:-1],
df.LON[1:],
df.LAT[1:],
)
df["new_index"] = np.arange(len(df))
idxs = df.groupby(["DISTURBANCE_ID"]).agg(
{"new_index": "max"}).values.flatten()
df.drop("new_index", axis=1, inplace=True)
dt = np.diff(df.TM).astype(float) / 3_600_000_000_000
u = np.zeros_like(df.LAT)
v = np.zeros_like(df.LAT)
v[:-1] = np.cos(fwd_azimuth * np.pi / 180) * distances / (dt * 1000) / 3.6
u[:-1] = np.sin(fwd_azimuth * np.pi / 180) * distances / (dt * 1000) / 3.6
v[idxs] = 0
u[idxs] = 0
df["u"] = u
df["v"] = v
dt = np.diff(df.TM).astype(float) / 3_600_000_000_000
dt_ = np.zeros(len(df))
dt_[:-1] = dt
df["dt"] = dt_
df = df[df.u != 0].copy()
print(f"Number of TC records: {len(df)}")
return df
def cyclic_wrapper(x, dim="longitude"):
"""
Use cartopy.util.add_cyclic_point with an xarray Dataset to
add a cyclic or wrap-around pixel to the `lon` dimension. This can be useful
for plotting with `cartopy`
So add_cyclic_point() works on 'dim' via xarray Dataset.map()
:param x: `xr.Dataset` to process
:param str dim: Dimension of the dataset to wrap on (default "longitude")
"""
wrap_data, wrap_lon = cutil.add_cyclic_point(
x.values,
coord=x.coords[dim].data,
axis=x.dims.index(dim)
)
return xr.DataArray(
wrap_data,
coords={dim: wrap_lon, **x.drop_vars(dim).coords},
dims=x.dims
)