Skip to content

Add confusion matrix to success criteria vignette #81

@LukasWallrich

Description

@LukasWallrich

Something along the lines of this will help with the interpretation of success criteria, though it can probably become much clearer with some tweaking.

Image
Messy draft code

library(tidyverse)
library(FReD)

df <- load_fred_data() %>%
filter(!is.na(es_replication) & !is.na(es_original)) %>%
group_by(id) %>%
mutate(id_unique = paste0(id, "_", row_number())) %>%
ungroup()

criteria <- tribble(
~criterion, ~name,
"significance_r", "Significance\n(replication)",
"significance_agg", "Significance\n(aggregated)",
"consistency_ci", "Consistency\n(confidence interval)",
"consistency_pi", "Consistency\n(prediction interval)",
"homogeneity", "Homogeneity",
"homogeneity_significance", "Homogeneity +\nSignificance",
"small_telescopes", "Small Telescopes"
)

all_results <- lapply(criteria$criterion, function(criterion) {
df %>%
mutate(assess_replication_outcome(es_original, n_original, es_replication, n_replication, criterion = criterion)) %>%
mutate(criterion = criterion)
})

outcomes <- all_results %>%
bind_rows()

out <- outcomes %>%
select(id_unique, criterion, outcome) %>%
pivot_wider(values_from = outcome, names_from = criterion)

long <- out %>%
select(-small_telescopes) %>%
pivot_longer(
cols = -id_unique,
names_to = "criterion",
values_to = "outcome"
)

compute pairwise agreement / disagreement rates

pair_stats <- long %>%
rename(outcome_x = outcome, crit_x = criterion) %>%
inner_join(
long %>% rename(outcome_y = outcome, crit_y = criterion),
by = "id_unique"
) %>%
filter(crit_x != crit_y) %>%
group_by(crit_x, crit_y) %>%
summarize(
agree = mean(outcome_x == outcome_y, na.rm = TRUE),
A_succ_B_fail = mean(outcome_x == "success" & outcome_y == "failure", na.rm = TRUE),
B_succ_A_fail = mean(outcome_x == "failure" & outcome_y == "success", na.rm = TRUE),
.groups = "drop"
)

criteria_plot <- sort(unique(c(pair_stats$crit_x, pair_stats$crit_y)))

plot_df <- pair_stats %>%
mutate(
x = as.numeric(factor(crit_y, levels = criteria_plot)),
y = as.numeric(factor(crit_x, levels = rev(criteria_plot)))
)

upper‐left triangle (predicted‐success/actual‐fail)

tri_ul <- plot_df %>%
rowwise() %>%
mutate(poly = list(tibble(
px = c(x - .5, x + .5, x - .5),
py = c(y + .5, y + .5, y - .5),
val = A_succ_B_fail
))) %>%
unnest(poly)

bottom‐right triangle (predicted‐fail/actual‐success)

tri_br <- plot_df %>%
rowwise() %>%
mutate(poly = list(tibble(
px = c(x + .5, x - .5, x + .5),
py = c(y - .5, y - .5, y + .5),
val = B_succ_A_fail
))) %>%
unnest(poly)

plot_df2 <- plot_df %>% filter(crit_x > crit_y)
tri_ul2 <- tri_ul %>% filter(crit_x > crit_y)
tri_br2 <- tri_br %>% filter(crit_x > crit_y)

library(ggnewscale)
p <- ggplot() +

1) Draw the two bias-triangles FIRST with the "green" scale

geom_polygon(data = tri_ul2,
aes(px, py, group = paste(crit_x, crit_y), fill = A_succ_B_fail),
color = "white") +
geom_polygon(data = tri_br2,
aes(px, py, group = paste(crit_x, crit_y), fill = B_succ_A_fail),
color = "white") +
scale_fill_gradient(low = "white", high = "green", name = "Bias") +

2) Switch to a new fill scale…

new_scale_fill() +

3) …then draw the agreement squares on top

geom_rect(data = plot_df2,
aes(xmin = x - .5, xmax = x,
ymin = y - .5, ymax = y,
fill = agree),
color = "white") +
scale_fill_gradient(low = "grey90", high = "grey10", name = "Agreement") +

4) Finally, add the three text labels per cell

geom_text(data = plot_df2,
aes(x - .25, y + .25,
label = scales::percent(A_succ_B_fail, accuracy = 1)),
size = 2) +
geom_text(data = plot_df2,
aes(x - .25, y - .25,
label = scales::percent(agree, accuracy = 1)),
size = 2) +
geom_text(data = plot_df2,
aes(x + .25, y - .25,
label = scales::percent(B_succ_A_fail, accuracy = 1)),
size = 2) +

5) Axes, fixed aspect, clean theme

coord_fixed() +
scale_x_continuous(breaks = seq_along(criteria_plot), labels = (criteria$name %>% set_names(criteria$criterion))[criteria_plot], expand = c(0, 0)) +
scale_y_continuous(breaks = seq_along(criteria_plot), labels = rev((criteria$name %>% set_names(criteria$criterion))[criteria_plot]), expand = c(0, 0)) +
theme_minimal(base_size = 10) +
theme(
axis.text.x = element_text(angle = 45, hjust = 1),
panel.grid = element_blank()
) +
labs(x = "Success Criterion 1", y = "Success Criterion 2", title = "Comparison of success criteria",
caption = "Note: Squares show overlap, vertical arrows show\nwhen row is more liberal, horizontal arrows when column is more liberal.") +
theme(legend.position = "none") +
# draw full-cell borders
geom_tile(
data = plot_df2,
aes(x = x, y = y),
fill = NA,
color = "black",
linewidth = 0.3
)

p

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions