@@ -6,7 +6,8 @@ import NaNStatistics: nanmedian
6
6
export RMSEVariable,
7
7
model_names,
8
8
category_names,
9
- rmse_units
9
+ rmse_units,
10
+ read_rmses
10
11
11
12
"""
12
13
Holding root mean squared errors over multiple categories and models for a single
@@ -236,4 +237,71 @@ Return all the unit of the models in `rmse_var`.
236
237
"""
237
238
rmse_units (rmse_var:: RMSEVariable ) = rmse_var. units
238
239
240
+ """
241
+ read_rmses(csv_file::String, short_name::String; units = nothing)
242
+
243
+ Read a CSV file and create a RMSEVariable with the `short_name` of the variable.
244
+
245
+ The format of the CSV file should have a header consisting of the entry "model_name" (or any
246
+ other text as it is ignored by the function) and rest of the entries should be the category
247
+ names. Each row after the header should start with the model name and the root mean squared
248
+ errors for each category for that model. The entries of the CSV file should be separated by
249
+ commas.
250
+
251
+ The parameter `units` can be a dictionary mapping model name to unit or a string. If `units`
252
+ is a string, then units will be the same across all models. If units is `nothing`, then the
253
+ unit is missing for each model which is denoted by an empty string.
254
+ """
255
+ function read_rmses (csv_file:: String , short_name:: String ; units = nothing )
256
+ # Intialize variables we need to construct RMSEVariable
257
+ model_names = Vector {String} ()
258
+ model_rmse_vec = []
259
+ category_names = nothing
260
+ open (csv_file, " r" ) do io
261
+ header = readline (io)
262
+ # Get categories (e.g. DJF, MAM, JJA, SON, ANN)
263
+ category_names = String .(split (header, ' ,' ))
264
+
265
+ # get rid of the first column name which is the column named "model_name"
266
+ category_names |> popfirst!
267
+
268
+ # Process each line
269
+ for (line_num, line) in enumerate (eachline (io))
270
+ # Split the line by comma
271
+ fields = split (line, ' ,' )
272
+
273
+ # Check if any entry is missing in the CSV file
274
+ length (fields) != (length (category_names) + 1 ) &&
275
+ error (" Missing RMSEs for line $(line_num + 1 ) in CSV file" )
276
+
277
+ # Grab model name
278
+ model_name = fields[1 ]
279
+
280
+ # the rest of the row is the rmse for each category
281
+ model_rmse = map (x -> parse (Float64, x), fields[2 : end ])
282
+
283
+ push! (model_names, model_name)
284
+ push! (model_rmse_vec, model_rmse)
285
+ end
286
+ end
287
+ model_rmses = stack (model_rmse_vec, dims = 1 )
288
+ isnothing (units) && (
289
+ units = Dict {valtype(model_names), String} ([
290
+ (model_name, " " ) for model_name in model_names
291
+ ])
292
+ )
293
+ units isa String && (
294
+ units = Dict {valtype(model_names), String} ([
295
+ model_name => units for model_name in model_names
296
+ ])
297
+ )
298
+ return RMSEVariable (
299
+ short_name,
300
+ model_names,
301
+ category_names,
302
+ model_rmses,
303
+ units,
304
+ )
305
+ end
306
+
239
307
end
0 commit comments