Skip to content

Commit adb1dd0

Browse files
committed
Add plot_boxplot! and plot_leaderboard! for RMSEs
This commit adds functionality to plot a boxplot for a single `RMSEVariable` and a leaderboard/heatmap for multiple `RMSEVariable`s. For handling NaNs, the box plot filter out NaNs. For the leaderboard, if a NaN is present, the cell corresponding to the NaN will not be filled out.
1 parent 9e8c3e3 commit adb1dd0

7 files changed

+528
-0
lines changed

NEWS.md

+11
Original file line numberDiff line numberDiff line change
@@ -383,6 +383,17 @@ ClimaAnalysis.find_worst_single_model(rmse_var, category_name = "DJF")
383383
ClimaAnalysis.median(rmse_var)
384384
```
385385

386+
#### Plotting RMSEVariable
387+
`RMSEVariable` can be visualized as a box plot or heat map using `plot_boxplot!` and
388+
`plot_leaderboard!`. The function `plot_boxplot!(fig, rmse_var::ClimaAnalysis.RMSEVariable;
389+
model_names = ["CliMA"], ploc = (1, 1), best_and_worst_category_name = "ANN")` makes a box
390+
plot for each category in the `RMSEVariable` and plots any other models as specified by
391+
`model_names`. The function `plot_leaderboard!(fig,
392+
rmse_vars::ClimaAnalysis.RMSEVariable...; ploc = (1, 1), model_names = ["CliMA"],
393+
best_category_name = "ANN")` makes a heatmap of the RMSEs between the variables of interest
394+
and the categories. The values of the heatmap are normalized by dividing over the median
395+
model's RMSEs for each variable.
396+
386397
## Bug fixes
387398

388399
- Increased the default value for `warp_string` to 72.

docs/make.jl

+1
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ makedocs(;
3131
"OutputVars" => "var.md",
3232
"Visualizing OutputVars" => "visualize.md",
3333
"RMSEVariables" => "rmse_var.md",
34+
"Visualizing RMSEVariables" => "visualize_rmse_var.md",
3435
"APIs" => "api.md",
3536
"How do I?" => "howdoi.md",
3637
],

docs/src/api.md

+2
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,8 @@ Visualize.line_plot1D!
123123
Visualize.sliced_line_plot!
124124
Visualize.sliced_plot!
125125
Visualize.plot!
126+
Visualize.plot_boxplot!
127+
Visualize.plot_leaderboard!
126128
```
127129

128130
## GeoMakie

docs/src/visualize_rmse_var.md

+105
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,105 @@
1+
# Visualizing `RMSEVariable`s
2+
3+
Instead of computing summary statistics, it may be more helpful to plot a box plot or a
4+
heatmap. `ClimaAnalysis` provides the functions `plot_boxplot!` and `plot_leaderboard!`
5+
to help visualize the root mean squared errors (RMSEs) in a `RMSEVariable`.
6+
7+
The function [`Visualize.plot_boxplot!`](@ref) makes a box plot for each
8+
category in the `RMSEVariable`. The best model and worst model and any other models in
9+
`model_names` are plotted. The category to find the best and worst model defaults to
10+
"ANN", but can be changed using the parameter `best_and_worst_category_name`.
11+
12+
The function [`Visualize.plot_leaderboard!`](@ref) makes a heatmap of the
13+
RMSEs between the variables of interest and the categories. The best model for each variable
14+
of interest and the models in `model_names` are shown in the heatmap. Similar to
15+
`plot_boxplot!`, the category to find the best model defaults to "ANN", but can be changed
16+
using the parameter `best_category_name`. The values of the heatmap are normalized by
17+
dividing over the median model's RMSEs for each variable.
18+
19+
```@setup plotting
20+
import ClimaAnalysis
21+
import CairoMakie
22+
23+
csv_file_path = "./data/test_csv.csv"
24+
rmse_var_ta = ClimaAnalysis.read_rmses(csv_file_path, "ta")
25+
rmse_var_ta = ClimaAnalysis.add_model(rmse_var_ta, "CliMA", "test1", "test2")
26+
rmse_var_ta[:, :] = [
27+
[10.0 11.0 12.0 13.0 14.0]
28+
[36.0 37.0 38.0 39.0 30.0]
29+
[11.0 12.0 13.0 14.0 15.0]
30+
[13.0 13.0 13.0 13.0 15.0]
31+
[24.0 24.0 24.0 24.0 24.0]
32+
]
33+
ClimaAnalysis.add_unit!(
34+
rmse_var_ta,
35+
Dict(
36+
"ACCESS-ESM1-5" => "K",
37+
"ACCESS-CM2" => "K",
38+
"CliMA" => "K",
39+
"test1" => "K",
40+
"test2" => "K",
41+
),
42+
)
43+
44+
rmse_var_pr = ClimaAnalysis.read_rmses(csv_file_path, "pr")
45+
rmse_var_pr = ClimaAnalysis.add_model(rmse_var_pr, "CliMA")
46+
rmse_var_pr[:, :] = [
47+
[6.0 7.0 8.0 9.0 10.0]
48+
[11.0 12.0 13.0 14.0 15.0]
49+
[1.0 2.0 3.0 4.0 11.0]
50+
]
51+
ClimaAnalysis.add_unit!(
52+
rmse_var_pr,
53+
Dict(
54+
"ACCESS-ESM1-5" => "kg m^-2 s^-1",
55+
"ACCESS-CM2" => "kg m^-2 s^-1",
56+
"CliMA" => "kg m^-2 s^-1",
57+
),
58+
)
59+
60+
rmse_var_ha = ClimaAnalysis.read_rmses(csv_file_path, "ha")
61+
rmse_var_ha = ClimaAnalysis.add_model(rmse_var_ha, "CliMA")
62+
rmse_var_ha[:, :] = [
63+
[0.5 1.0 1.5 2.0 2.5]
64+
[6.0 7.0 8.0 9.0 10.0]
65+
[11.0 12.0 13.0 14.0 7.0]
66+
]
67+
ClimaAnalysis.add_unit!(
68+
rmse_var_ha,
69+
Dict(
70+
"ACCESS-ESM1-5" => "m^2 s^-2",
71+
"ACCESS-CM2" => "m^2 s^-2",
72+
"CliMA" => "m^2 s^-2",
73+
),
74+
)
75+
```
76+
77+
```@example plotting
78+
import ClimaAnalysis
79+
import CairoMakie
80+
81+
# Plot box plots
82+
rmse_vars = (rmse_var_ta, rmse_var_pr, rmse_var_ha)
83+
fig = CairoMakie.Figure(; size = (800, 300 * 3 + 400), fontsize = 20)
84+
for i in 1:3
85+
ClimaAnalysis.Visualize.plot_boxplot!(
86+
fig,
87+
rmse_vars[i],
88+
ploc = (i, 1),
89+
best_and_worst_category_name = "ANN",
90+
)
91+
end
92+
93+
# Plot leaderboard
94+
ClimaAnalysis.Visualize.plot_leaderboard!(
95+
fig,
96+
rmse_vars...,
97+
best_category_name = "ANN",
98+
ploc = (4, 1),
99+
)
100+
CairoMakie.save("./assets/boxplot_and_leaderboard.png", fig)
101+
102+
nothing # hide
103+
```
104+
105+
![box plot](./assets/boxplot_and_leaderboard.png)

ext/ClimaAnalysisMakieExt.jl

+236
Original file line numberDiff line numberDiff line change
@@ -630,4 +630,240 @@ function Visualize._constrained_cmap(
630630
return cmap
631631
end
632632

633+
"""
634+
Visualize.plot_boxplot!(fig,
635+
rmse_var::ClimaAnalysis.RMSEVariable;
636+
model_names = ["CliMA"],
637+
ploc = (1, 1),
638+
best_and_worst_category_name = "ANN")
639+
640+
Plot a Tukey style boxplot for each category in `rmse_var`.
641+
642+
The best and worst single models are found for the category `best_and_worst_category_name`
643+
and are plotted on the boxplot. Additionally, any model in `model_names` will also be
644+
plotted on the boxplot.
645+
646+
The parameter `ploc` determines where to place the plot on the figure.
647+
"""
648+
function Visualize.plot_boxplot!(
649+
fig,
650+
rmse_var::ClimaAnalysis.RMSEVariable;
651+
model_names = ["CliMA"],
652+
ploc = (1, 1),
653+
best_and_worst_category_name = "ANN",
654+
)
655+
# Unit checking
656+
ClimaAnalysis.Leaderboard._unit_check(rmse_var)
657+
658+
num_cats = length(rmse_var.category2index)
659+
units = values(rmse_var.units) |> collect |> first
660+
661+
# Title and labels for x-axis and y-axis
662+
ax = Makie.Axis(
663+
fig[ploc...],
664+
ylabel = "$(rmse_var.short_name) [$units]",
665+
xticks = (1:num_cats, ClimaAnalysis.category_names(rmse_var)),
666+
title = "Global RMSE $(rmse_var.short_name) [$units]",
667+
)
668+
669+
# Set up for box plot
670+
cats = reduce(
671+
vcat,
672+
[
673+
fill(cat_val, length(rmse_var.model2index)) for
674+
cat_val in 1:length(rmse_var.category2index)
675+
],
676+
)
677+
vals = reduce(vcat, rmse_var.RMSEs)
678+
679+
# Filter out NaNs because we can't plot with NaNs
680+
not_nan_idices = findall(!isnan, vals)
681+
cats = cats[not_nan_idices]
682+
vals = vals[not_nan_idices]
683+
684+
# Add box plot
685+
Makie.boxplot!(
686+
ax,
687+
cats,
688+
vals,
689+
whiskerwidth = 1,
690+
width = 0.35,
691+
mediancolor = :black,
692+
color = :gray,
693+
whiskerlinewidth = 1,
694+
)
695+
696+
# Plotting best and worst model
697+
absolute_worst_values, absolute_worst_model_name =
698+
ClimaAnalysis.find_worst_single_model(
699+
rmse_var,
700+
category_name = best_and_worst_category_name,
701+
)
702+
absolute_best_values, absolute_best_model_name =
703+
ClimaAnalysis.find_best_single_model(
704+
rmse_var,
705+
category_name = best_and_worst_category_name,
706+
)
707+
Makie.scatter!(
708+
ax,
709+
1:num_cats,
710+
absolute_worst_values,
711+
label = absolute_worst_model_name,
712+
)
713+
Makie.scatter!(
714+
ax,
715+
1:num_cats,
716+
absolute_best_values,
717+
label = absolute_best_model_name,
718+
)
719+
720+
# Plotting the median model
721+
Makie.scatter!(
722+
ax,
723+
1:num_cats,
724+
ClimaAnalysis.median(rmse_var),
725+
label = "Median",
726+
color = :black,
727+
marker = :hline,
728+
markersize = 10,
729+
visible = false,
730+
)
731+
732+
# Plot CliMA model and other models
733+
for model_name in model_names
734+
ClimaAnalysis.Leaderboard._model_name_check(rmse_var, model_name)
735+
if model_name == "CliMA"
736+
Makie.scatter!(
737+
ax,
738+
1:num_cats,
739+
rmse_var[model_name],
740+
label = model_name,
741+
marker = :star5,
742+
markersize = 20,
743+
color = :green,
744+
)
745+
else
746+
Makie.scatter!(
747+
ax,
748+
1:num_cats,
749+
rmse_var[model_name],
750+
label = model_name,
751+
markersize = 20,
752+
color = :red,
753+
)
754+
end
755+
end
756+
757+
# Hack to make legend appear better
758+
Makie.axislegend()
759+
Makie.scatter!(ax, [num_cats + 2.5], [0.1], markersize = 0.01)
760+
end
761+
762+
"""
763+
Visualize.plot_leaderboard!(fig,
764+
rmse_vars::ClimaAnalysis.RMSEVariable...;
765+
ploc = (1, 1),
766+
model_names = ["CliMA"],
767+
best_category_name = "ANN")
768+
769+
Plot a heatmap over the categories and models. The models that appear is the best model
770+
as found for the category `best_category_name` and any other models in `model_names`. The
771+
root mean squared errors for each variable of interest is normalized by dividing over the
772+
median root mean squared error of each variable.
773+
774+
The parameter `ploc` determines where to place the plot on the figure.
775+
"""
776+
function Visualize.plot_leaderboard!(
777+
fig,
778+
rmse_vars::ClimaAnalysis.RMSEVariable...;
779+
ploc = (1, 1),
780+
model_names = ["CliMA"],
781+
best_category_name = "ANN",
782+
)
783+
# Check if rmse_model_vars all have the same categories
784+
categories_names = ClimaAnalysis.category_names.(rmse_vars)
785+
categories_same = length(unique(categories_names)) == 1
786+
categories_same ||
787+
error("Categories are not all the same across the RMSEVariable")
788+
789+
rmse_var = first(rmse_vars)
790+
categ_names = ClimaAnalysis.category_names(rmse_var)
791+
num_variables = length(rmse_vars)
792+
num_boxes = length(categ_names) # number of categories
793+
num_models = 1 + length(model_names) # best model plus the other models in model_names
794+
795+
# Initialize variables we need for storing RMSEs for plotting and short names for axis
796+
rmse_normalized_arr = zeros(num_boxes * num_models, num_variables)
797+
short_names = String[]
798+
799+
for (idx, var) in enumerate(reverse(rmse_vars))
800+
# Get all the short name of the rmse_vars
801+
push!(short_names, var.short_name)
802+
803+
# Compute median and best values for RMSE
804+
med_vals = ClimaAnalysis.median(var)
805+
best_vals, _ = ClimaAnalysis.find_best_single_model(
806+
var,
807+
category_name = best_category_name,
808+
)
809+
810+
# Find normalized values for the models we are interested in and the normalized best
811+
# value and store them
812+
normalized_vals = [var[model] ./ med_vals for model in model_names]
813+
normalized_vals = reduce(vcat, normalized_vals)
814+
rmse_normalized_arr[:, idx] =
815+
vcat(normalized_vals, best_vals ./ med_vals)'
816+
end
817+
818+
# Finding the midpoint for placing labels
819+
start_x_tick = div(num_boxes, 2, RoundUp)
820+
821+
ax_bottom_and_left = Makie.Axis(
822+
fig[ploc...],
823+
yticks = (1:length(short_names), short_names),
824+
xticks = (
825+
[start_x_tick, start_x_tick + num_boxes],
826+
vcat(model_names, ["Best model"]),
827+
),
828+
aspect = num_boxes * num_models,
829+
xgridvisible = false,
830+
ygridvisible = false,
831+
)
832+
ax_top = Makie.Axis(
833+
fig[ploc...],
834+
xaxisposition = :top,
835+
xticks = (0.5:1.0:length(categ_names), categ_names),
836+
aspect = num_boxes * num_models,
837+
xgridvisible = false,
838+
ygridvisible = false,
839+
)
840+
Makie.hidespines!(ax_top)
841+
Makie.hideydecorations!(ax_top)
842+
843+
colormap = Makie.Reverse(:RdYlGn)
844+
845+
# Filter out NaNs here because we need to take the maximum and extrema for the
846+
# colorrange and limits
847+
rmse_no_nan_vec = rmse_normalized_arr |> vec |> filter(!isnan)
848+
Makie.heatmap!(
849+
ax_bottom_and_left,
850+
rmse_normalized_arr,
851+
colormap = colormap,
852+
# Trick to exclude the zeros
853+
lowclip = :white,
854+
colorrange = (1e-10, maximum(rmse_no_nan_vec)),
855+
)
856+
for idx in eachindex(model_names)
857+
Makie.vlines!(ax_top, num_boxes * idx, color = :black, linewidth = 3.0)
858+
end
859+
row, col = ploc
860+
col += 1
861+
Makie.Colorbar(
862+
fig[row, col],
863+
limits = extrema(rmse_no_nan_vec),
864+
label = "RMSE/median(RMSE)",
865+
colormap = colormap,
866+
)
867+
end
868+
633869
end

src/Visualize.jl

+4
Original file line numberDiff line numberDiff line change
@@ -30,4 +30,8 @@ function heatmap2D_on_globe! end
3030

3131
function plot_bias_on_globe! end
3232

33+
function plot_boxplot! end
34+
35+
function plot_leaderboard! end
36+
3337
end

0 commit comments

Comments
 (0)