@@ -26,8 +26,8 @@ julia> match_nc_filename("ta_1d_average.nc")
26
26
```
27
27
28
28
```jldoctest
29
- julia> match_nc_filename("pfull_6.0min_max .nc")
30
- ("pfull", "6.0min ", "max")
29
+ julia> match_nc_filename("pfull_6.0m_max .nc")
30
+ ("pfull", "6.0m ", "max")
31
31
```
32
32
33
33
```jldoctest
@@ -39,23 +39,36 @@ function match_nc_filename(filename::String)
39
39
# Let's unpack this regular expression to find files names like "orog_inst.nc" or
40
40
# "ta_3.0h_average.nc" and extract information from there.
41
41
42
- # ^ $: mean match the entire string
43
- # (\w+?): the first capturing group, matching any word non greedily
44
- # _: matches this literal character
45
- # (?>([a-zA-Z0-9\.]*)_)?: an optional group (it doesn't always exist for _inst
46
- # variables) ?> means that we don't want to capture the outside
47
- # group the inside group is any combinations of letters/numbers,
48
- # and the literal character ., followed by the _. We capture the
49
- # combination of characters because that's the reduction
50
- # (\w+): Again, any word
51
- # \.nc: file extension has to be .nc
52
- re = r" ^(\w +?)_(?>([a-zA-Z0-9_\. ]*)_)?(\w *)\. nc$"
42
+ # ^: Matches the beginning of the string
43
+
44
+ # (\w+?): Matches one or more word characters (letters, numbers, or underscore)
45
+ # non-greedily and captures it as the first group (variable name)
46
+
47
+ # _: Matches the underscore separating the variable name and the optional time
48
+ # resolution.
49
+
50
+ # ((?:[0-9]|m|M|d|s|y|_|\.)*?): Matches zero or more occurrences of the allowed
51
+ # characters (digits, time units, underscore, or dot) non-greedily and captures the
52
+ # entire time resolution string as the second group
53
+
54
+ # _?: Matches an optional underscore (to handle cases where there's no time resolution)
55
+
56
+ # ([a-zA-Z0-9]+): Matches one or more alphanumeric characters and captures it as the
57
+ # third group (statistic)
58
+
59
+ # \.nc: Matches the literal ".nc" file extension
60
+
61
+ # $: Matches the end of the string
62
+
63
+ re = r" ^(\w +?)_((?:[0-9]|m|M|d|s|y|h|_|\. )*?)_?([a-zA-Z0-9]+)\. nc$"
53
64
m = match (re, filename)
54
65
if ! isnothing (m)
55
66
# m.captures returns `SubString`s (or nothing). We want to have actual `String`s (or
56
- # nothing) so that we can assume we have `String`s everywhere.
67
+ # nothing) so that we can assume we have `String`s everywhere. We also take care of
68
+ # the case where the period is matched to an empty string and return nothing instead
57
69
return Tuple (
58
- isnothing (cap) ? nothing : String (cap) for cap in m. captures
70
+ (isnothing (cap) || cap == " " ) ? nothing : String (cap) for
71
+ cap in m. captures
59
72
)
60
73
else
61
74
return nothing
0 commit comments