Skip to content

Commit ffb539d

Browse files
committed
Add Var.split_by_season
Bug introduced: The documentation show the wrong module name for Var.split_by_season. It is ClimaAnalysis.Utils.split_by_season instead of ClimaAnalysis.Var.split_by_season.
1 parent 67d34ff commit ffb539d

File tree

5 files changed

+211
-2
lines changed

5 files changed

+211
-2
lines changed

NEWS.md

+40
Original file line numberDiff line numberDiff line change
@@ -164,6 +164,46 @@ julia> long_name(integrated_var) # updated long name to reflect the data being i
164164
"f integrated over lon (-179.5 to 179.5degrees_east) and integrated over lat (-89.5 to 89.5degrees_north)"
165165
```
166166

167+
### Split by season
168+
`OutputVar`s can be split by seasons using `split_by_season(var)` provided that a start date
169+
can be found in `var.attributes["start_date"]` and time is a dimension in the `OutputVar`.
170+
The unit of time is expected to be second. The function `split_by_season(var)` returns a
171+
vector of four `OutputVar`s with each `OutputVar` corresponding to a season. The months of
172+
the seasons are March to May, June to August, September to November, and December to
173+
February. The order of the vector is MAM, JJA, SON, and DJF. If there are no dates found for
174+
a season, then the `OutputVar` for that season will be an empty `OutputVar`.
175+
176+
```@julia split_by_season
177+
julia> attribs = Dict("start_date" => "2024-1-1");
178+
179+
julia> time = [0.0, 5_184_000.0, 13_132_800.0]; # correspond to dates 2024-1-1, 2024-3-1, 2024-6-1
180+
181+
julia> dims = OrderedDict(["time" => time]);
182+
183+
julia> dim_attribs = OrderedDict(["time" => Dict("units" => "s")]); # unit is second
184+
185+
julia> data = [1.0, 2.0, 3.0];
186+
187+
julia> var = ClimaAnalysis.OutputVar(attribs, dims, dim_attribs, data);
188+
189+
julia> MAM, JJA, SON, DJF = ClimaAnalysis.split_by_season(var);
190+
191+
julia> ClimaAnalysis.isempty(SON) # empty OutputVar because no dates between September to November
192+
true
193+
194+
julia> [MAM.dims["time"], JJA.dims["time"], DJF.dims["time"]]
195+
3-element Vector{Vector{Float64}}:
196+
[5.184e6]
197+
[1.31328e7]
198+
[0.0]
199+
200+
julia> [MAM.data, JJA.data, DJF.data]
201+
3-element Vector{Vector{Float64}}:
202+
[2.0]
203+
[3.0]
204+
[1.0]
205+
```
206+
167207
## Bug fixes
168208

169209
- Increased the default value for `warp_string` to 72.

docs/src/api.md

+1
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,7 @@ Var.convert_units
5959
Var.integrate_lonlat
6060
Var.integrate_lat
6161
Var.integrate_lon
62+
Var.split_by_season(var::OutputVar)
6263
```
6364

6465
## Utilities

docs/src/var.md

+40
Original file line numberDiff line numberDiff line change
@@ -100,3 +100,43 @@ julia> integrated_var.data # approximately 4π (the surface area of a sphere)
100100
julia> long_name(integrated_var) # updated long name to reflect the data being integrated
101101
"f integrated over lon (-179.5 to 179.5degrees_east) and integrated over lat (-89.5 to 89.5degrees_north)"
102102
```
103+
104+
## Split by season
105+
`OutputVar`s can be split by seasons using `split_by_season(var)` provided that a start date
106+
can be found in `var.attributes["start_date"]` and time is a dimension in the `OutputVar`.
107+
The unit of time is expected to be second. The function `split_by_season(var)` returns a
108+
vector of four `OutputVar`s with each `OutputVar` corresponding to a season. The months of
109+
the seasons are March to May, June to August, September to November, and December to
110+
February. The order of the vector is MAM, JJA, SON, and DJF. If there are no dates found for
111+
a season, then the `OutputVar` for that season will be an empty `OutputVar`.
112+
113+
```@julia split_by_season
114+
julia> attribs = Dict("start_date" => "2024-1-1");
115+
116+
julia> time = [0.0, 5_184_000.0, 13_132_800.0]; # correspond to dates 2024-1-1, 2024-3-1, 2024-6-1
117+
118+
julia> dims = OrderedDict(["time" => time]);
119+
120+
julia> dim_attribs = OrderedDict(["time" => Dict("units" => "s")]); # unit is second
121+
122+
julia> data = [1.0, 2.0, 3.0];
123+
124+
julia> var = ClimaAnalysis.OutputVar(attribs, dims, dim_attribs, data);
125+
126+
julia> MAM, JJA, SON, DJF = ClimaAnalysis.split_by_season(var);
127+
128+
julia> ClimaAnalysis.isempty(SON) # empty OutputVar because no dates between September to November
129+
true
130+
131+
julia> [MAM.dims["time"], JJA.dims["time"], DJF.dims["time"]]
132+
3-element Vector{Vector{Float64}}:
133+
[5.184e6]
134+
[1.31328e7]
135+
[0.0]
136+
137+
julia> [MAM.data, JJA.data, DJF.data]
138+
3-element Vector{Vector{Float64}}:
139+
[2.0]
140+
[3.0]
141+
[1.0]
142+
```

src/Var.jl

+67-2
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
module Var
22

3+
import Dates
34
import NCDatasets
45
import OrderedCollections: OrderedDict
56

@@ -8,7 +9,14 @@ import Statistics: mean
89
import NaNStatistics: nanmean
910

1011
import ..Numerics
11-
import ..Utils: nearest_index, seconds_to_prettystr, squeeze
12+
import ..Utils:
13+
nearest_index,
14+
seconds_to_prettystr,
15+
squeeze,
16+
split_by_season,
17+
time_to_date,
18+
date_to_time,
19+
_data_at_dim_vals
1220

1321
export OutputVar,
1422
read_var,
@@ -35,7 +43,8 @@ export OutputVar,
3543
integrate_lonlat,
3644
integrate_lon,
3745
integrate_lat,
38-
isempty
46+
isempty,
47+
split_by_season
3948

4049
"""
4150
Representing an output variable
@@ -901,6 +910,62 @@ function _integrate_over_angle(var::OutputVar, integrate_on, angle_dim_name)
901910
return integrated_var
902911
end
903912

913+
914+
"""
915+
split_by_season(var::OutputVar)
916+
917+
Return a vector of four `OutputVar`s split by season.
918+
919+
The months of the seasons are March to May, June to August, September to November, and
920+
December to February. The order of the vector is MAM, JJA, SON, and DJF. If there are no
921+
dates found for a season, then the `OutputVar` for that season will be an empty `OutputVar`.
922+
923+
The function will use the start date in `var.attributes["start_date"]`. The unit of time is
924+
expected to be second. Also, the interpolations will be inaccurate in time intervals
925+
outside of their respective season for the returned `OutputVar`s.
926+
"""
927+
function split_by_season(var::OutputVar)
928+
# Check time exists and unit is second
929+
has_time(var) || error("Time is not a dimension in var")
930+
dim_units(var, time_name(var)) == "s" ||
931+
error("Unit for time is not second")
932+
933+
# Check start date exists
934+
haskey(var.attributes, "start_date") ?
935+
start_date = Dates.DateTime(var.attributes["start_date"]) :
936+
error("Start date is not found in var")
937+
938+
season_dates = split_by_season(time_to_date.(start_date, times(var)))
939+
season_times =
940+
(date_to_time.(start_date, season) for season in season_dates)
941+
942+
# Split data according to seasons
943+
season_data = (
944+
collect(
945+
_data_at_dim_vals(
946+
var.data,
947+
times(var),
948+
var.dim2index[time_name(var)],
949+
season_time,
950+
),
951+
) for season_time in season_times
952+
)
953+
954+
# Construct an OutputVar for each season
955+
return map(season_times, season_data) do time, data
956+
if isempty(time)
957+
dims = empty(var.dims)
958+
data = similar(var.data, 0)
959+
return OutputVar(dims, data)
960+
end
961+
ret_dims = deepcopy(var.dims)
962+
ret_attribs = deepcopy(var.attributes)
963+
ret_dim_attribs = deepcopy(var.dim_attributes)
964+
ret_dims[time_name(var)] = time
965+
OutputVar(ret_attribs, ret_dims, ret_dim_attribs, data)
966+
end
967+
end
968+
904969
"""
905970
overload_binary_op(op)
906971

test/test_Var.jl

+63
Original file line numberDiff line numberDiff line change
@@ -841,3 +841,66 @@ end
841841
var,
842842
)
843843
end
844+
845+
@testset "split_by_season" begin
846+
lon = collect(range(-179.5, 179.5, 360))
847+
lat = collect(range(-89.5, 89.5, 180))
848+
time = [0.0]
849+
push!(time, 5_184_000.0) # correspond to 2024-3-1
850+
push!(time, 5_184_001.0)
851+
push!(time, 13_132_800.0) # correspond to 2024-6-1
852+
push!(time, 13_132_802.0)
853+
push!(time, 13_132_803.0)
854+
data = ones(length(lat), length(time), length(lon))
855+
dims = OrderedDict(["lat" => lat, "time" => time, "lon" => lon])
856+
attribs = Dict("long_name" => "hi", "start_date" => "2024-1-1")
857+
dim_attribs = OrderedDict([
858+
"lat" => Dict("units" => "deg"),
859+
"time" => Dict("units" => "s"),
860+
"lon" => Dict("units" => "deg"),
861+
])
862+
var = ClimaAnalysis.OutputVar(attribs, dims, dim_attribs, data)
863+
864+
MAM, JJA, SON, DJF = ClimaAnalysis.split_by_season(var)
865+
866+
# Check size of data
867+
@test size(MAM.data) == (length(lat), 2, length(lon))
868+
@test size(JJA.data) == (length(lat), 3, length(lon))
869+
@test size(SON.data) == (0,)
870+
@test size(DJF.data) == (length(lat), 1, length(lon))
871+
872+
# Check times are correct in OutputVars
873+
@test MAM.dims["time"] == [5_184_000.0, 5_184_001.0]
874+
@test JJA.dims["time"] == [13_132_800.0, 13_132_802.0, 13_132_803.0]
875+
@test DJF.dims["time"] == [0.0]
876+
877+
# Check start date
878+
MAM.attributes["start_date"] == "2024-1-1"
879+
JJA.attributes["start_date"] == "2024-1-1"
880+
DJF.attributes["start_date"] == "2024-1-1"
881+
882+
# Check empty OutputVar
883+
@test isempty(SON)
884+
885+
# Check error handling
886+
attribs_no_start_date = Dict("long_name" => "hi")
887+
var =
888+
ClimaAnalysis.OutputVar(attribs_no_start_date, dims, dim_attribs, data)
889+
@test_throws ErrorException ClimaAnalysis.split_by_season(var)
890+
891+
dim_attribs_no_sec = OrderedDict([
892+
"lat" => Dict("units" => "deg"),
893+
"time" => Dict("units" => "min"),
894+
"lon" => Dict("units" => "deg"),
895+
])
896+
var = ClimaAnalysis.OutputVar(attribs, dims, dim_attribs_no_sec, data)
897+
@test_throws ErrorException ClimaAnalysis.split_by_season(var)
898+
899+
lon = collect(range(-179.5, 179.5, 360))
900+
data = ones(length(lon))
901+
dims = OrderedDict(["lon" => lon])
902+
attribs = Dict("long_name" => "hi", "start_date" => "2024-1-1")
903+
dim_attribs = OrderedDict(["lon" => Dict("units" => "deg")])
904+
var = ClimaAnalysis.OutputVar(attribs, dims, dim_attribs, data)
905+
@test_throws ErrorException ClimaAnalysis.split_by_season(var)
906+
end

0 commit comments

Comments
 (0)