-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathexamples.R
253 lines (227 loc) · 9.42 KB
/
examples.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at https://mozilla.org/MPL/2.0/.
#'===============================================================================
#' You will need to have the tidyverse, raw and janitor packages installed. We
#' only use one function from janitor, so don't need to load it into memory.
library(tidyverse)
library(raw)
#'===============================================================================
#' Here we're setting some defaults for ggplot. The sizes are to make the display
#' easier to see in a large conference setting. You may want to omit this for
#' display on a laptop.
old_theme <- theme_set(theme_minimal(base_size = 16))
theme_update(
panel.grid.minor = element_blank()
)
update_geom_defaults("point", list(size = 3))
#'===============================================================================
#' This function will calculate incremental paid amounts, some LDFs, construct
#' factor variables for lag and accident year and set a flag for the upper part
#' of the triangle. Creating factors prevents ggplot from treating the lag and
#' accident years as real numbers. In these examples, we want them to behave
#' like categories.
augment_triangle <- function(tbl_in) {
tbl_in |>
janitor::clean_names() |>
group_by(accident_year) |>
arrange(lag, .by_group = TRUE) |>
mutate(
lag_factor = as.factor(lag),
ay_factor = as.factor(accident_year),
prior_paid = lag(cumulative_paid),
incremental_paid = coalesce(
cumulative_paid - prior_paid,
cumulative_paid
),
ldf_paid = incremental_paid / prior_paid,
upper = development_year <= 1997
) |>
ungroup()
}
tbl_taylor_mcguire <- raw::NJM_WC |>
augment_triangle()
#'===============================================================================
#' Some exploratory visualization. The last one illustrates the model we want to
#' fit. Only 12-24 is shown, but it generalizes to other lags.
tbl_taylor_mcguire |>
filter(upper) |>
ggplot(aes(lag_factor, cumulative_paid, group = ay_factor, color = ay_factor)) +
geom_point() +
geom_line() +
scale_y_continuous(labels = scales::comma) +
labs(
title = "Cumulative paid loss development",
x = "Development lag",
y = "Cumulative paid loss"
) +
theme(legend.position = 'none')
tbl_taylor_mcguire |>
filter(upper, lag <= 2, accident_year < 1997) |>
ggplot(aes(lag_factor, cumulative_paid, group = ay_factor)) +
geom_point() +
geom_line() +
scale_y_continuous(labels = scales::comma) +
labs(
title = "Cumulative paid loss development 12-24 months",
x = "Development lag",
y = "Cumulative paid loss"
) +
theme(legend.position = 'none')
tbl_taylor_mcguire |>
filter(upper, lag == 2) |>
ggplot(aes(prior_paid, cumulative_paid)) +
geom_point() +
scale_y_continuous(labels = scales::comma, limits = c(75e3, 125e3)) +
scale_x_continuous(labels = scales::comma) +
labs(
title = "Cumulative paid loss development 12-24 months",
x = "Prior cumulative paid",
y = "Cumulative paid loss"
) +
theme(legend.position = 'none')
tbl_taylor_mcguire |>
filter(upper, lag == 2) |>
ggplot(aes(prior_paid, cumulative_paid)) +
geom_point() +
geom_smooth(formula = y ~ 0 + x, method = lm, se = FALSE) +
scale_y_continuous(labels = scales::comma, limits = c(75e3, 125e3)) +
scale_x_continuous(labels = scales::comma) +
labs(
title = "Cumulative paid loss development 12-24 months",
x = "Prior cumulative paid",
y = "Cumulative paid loss"
) +
theme(legend.position = 'none')
#'==============================================================================
#' And here's our model. It's the chain ladder, but it's also a GLM!!!
model_one <- tbl_taylor_mcguire |>
filter(upper, lag > 1) |>
glm(
formula = cumulative_paid ~ 0 + prior_paid:lag_factor,
family = gaussian)
summary(model_one)
## We can also use a Poisson distribution
model_two <- tbl_taylor_mcguire |>
filter(upper, lag > 1) |>
glm(formula = cumulative_paid ~ 0 + lag_factor,
offset = log(prior_paid),
family = poisson)
## It's the volume-weighted average!!
model_two |>
coef() |>
exp()
## And we can use different predicts
model_three <- tbl_taylor_mcguire |>
filter(upper, lag > 1) |>
glm(formula = cumulative_paid ~ 0 + lag_factor,
offset = log(net_ep),
family = poisson)
summary(model_three)
#'==============================================================================
#' Here, we visualize an intercept. It does not appear all that useful for this
#' set of data. (Note that we're using data visualization to posit a model!)
tbl_taylor_mcguire |>
filter(upper, lag >= 2) |>
ggplot(aes(prior_paid, incremental_paid, color = lag_factor)) +
geom_point() +
geom_smooth(method = lm, se = FALSE, linetype = 'dashed') +
geom_smooth(method = lm, se = FALSE, formula = y ~ 0 + x) +
facet_wrap(~ lag_factor, scales = 'free') +
scale_y_continuous(labels = scales::comma) +
scale_x_continuous(labels = scales::comma) +
labs(
title = "Incremental payments against prior cumulative paid",
subtitle = 'Dashed line contains an intercept',
x = "Prior cumulative paid",
y = "Incremental paid loss"
) +
theme(legend.position = 'none')
## Explore another data set
tbl_other <- raw::wkcomp |>
filter(str_starts(Company, 'Allstate')) |>
augment_triangle()
tbl_other |>
filter(upper, lag >= 2) |>
ggplot(aes(prior_paid, incremental_paid, color = lag_factor)) +
geom_point() +
geom_smooth(method = lm, se = FALSE, linetype = 'dashed') +
geom_smooth(method = lm, se = FALSE, formula = y ~ 0 + x) +
facet_wrap(~ lag_factor, scales = 'free') +
scale_y_continuous(labels = scales::comma) +
scale_x_continuous(labels = scales::comma) +
labs(
title = "Incremental payments against prior cumulative paid",
subtitle = 'Dashed line contains an intercept',
x = "Prior cumulative paid",
y = "Incremental paid loss"
) +
theme(legend.position = 'none')
## Do we really need a separate coefficient for each lag? They kinda look the same
tbl_other |>
filter(upper, lag >= 7) |>
ggplot(aes(prior_paid, incremental_paid)) +
geom_point(aes(color = lag_factor)) +
geom_smooth(method = lm, se = FALSE, linetype = 'dashed') +
geom_smooth(method = lm, se = FALSE, formula = y ~ 0 + x) +
scale_y_continuous(labels = scales::comma) +
scale_x_continuous(labels = scales::comma) +
labs(
title = "Incremental payments against prior cumulative paid",
subtitle = "Lags 7 and higher, dashed line contains an intercept",
x = "Prior cumulative paid",
y = "Incremental paid loss"
) +
theme(legend.position = 'none')
## Our last model (not shown during the presentation) has a flat/additive factor
## for lags 7 and higher. Might work better than chain ladder for the whole
## triangle!
model_four <- tbl_other |>
filter(upper, lag >= 7) |>
glm(formula = cumulative_paid ~ 1,
family = poisson)
summary(model_four)
#'==============================================================================
#' I had some supplementary material about treating the incremental or cumulative
#' as a response. TL;DR you should use incremental. Hit me up on LinkedIn if
#' you're curious about that. Hint: hypothesis testing will default to having
#' coefficients equal to zero, not one. Another hint: A + B is _always_
#' correlated with B.
tbl_taylor_mcguire |>
filter(upper, lag == 2) |>
ggplot(aes(prior_paid, incremental_paid)) +
geom_point() +
scale_y_continuous(labels = scales::comma, limits = c(30e3, 60e3)) +
scale_x_continuous(labels = scales::comma) +
labs(
title = "Incremental paid loss development",
x = "Prior cumulative paid",
y = "Incremental paid loss"
) +
theme(legend.position = 'none')
tbl_taylor_mcguire |>
filter(upper, lag == 2) |>
ggplot(aes(prior_paid, cumulative_paid)) +
geom_point() +
geom_smooth(formula = y ~ 0 + x, method = lm) +
scale_y_continuous(labels = scales::comma, limits = c(75e3, 125e3)) +
scale_x_continuous(labels = scales::comma) +
labs(
title = "Cumulative paid loss development",
x = "Prior cumulative paid",
y = "Cumulative paid loss"
) +
theme(legend.position = 'none')
tbl_taylor_mcguire |>
filter(upper, lag == 2) |>
ggplot(aes(prior_paid, incremental_paid)) +
geom_point() +
geom_smooth(formula = y ~ 0 + x, method = lm) +
scale_y_continuous(labels = scales::comma, limits = c(30e3, 60e3)) +
scale_x_continuous(labels = scales::comma) +
labs(
title = "Incremental paid loss development",
x = "Prior cumulative paid",
y = "Incremental paid loss"
) +
theme(legend.position = 'none')