-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsummarise_accuracies.R
91 lines (74 loc) · 2.62 KB
/
summarise_accuracies.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
#
# mtDNA results 100 rep
# Accuracy by animal category
#
library(tidyverse)
# holder dataset
df = NULL
# list directories in folder (replicates)
folders = list.dirs()
# remove ./
folders = folders[-1]
# iterate over replicates folders
for(folder in folders){
# list all files in folder for the maxQTL scenario
files = list.files(path=folder,pattern = "*maxQTL_\\d*.RData",full.names = "TRUE")
# iterate over files
for(file in files){
# load file and summarise category results
load(file)
# filter the last year of breeding cycle,
# select col Category, Scenario, IIds and nEbv
# add replicate col (REP)
# bring nTbv from Records table
tmp = catSummary %>%
filter(Breeding_year == 40) %>%
select(Category, Scenario, IIds, nEbv) %>%
mutate(REP = rep) %>%
unnest_longer(c(IIds, nEbv))%>%
mutate(nTbv = Records$gv_corr[match(IIds, Records$IId)]) %>%
select(-IIds)
# combine to holer dataset
df = rbind(df, tmp)
}
}
# save results
write_csv(df, "category_sum.csv")
# select only STANDARD and BASELINE scenarios
# calculate accuray by category/scenario/rep
df2 = df %>%
filter(Scenario %in% c("GENmtbaseline",
"GENstdbaseline",
"PEDmtbaseline",
"PEDstdbaseline")) %>%
group_by(Category, Scenario, REP) %>%
summarise(acc = cor(nEbv, nTbv))
#write_csv(df2, "acc_cat_100.csv")
#df = read_csv("~/Desktop/eddie_mount/data/acc_cat_100.csv")
#head(df)
# Split the Scenario variable into PROGRAM and SCENARIO
# rename proven_bulls and young_bulls from GS Program
df2 = df %>% mutate(
Program = ifelse(Scenario %in% c("GENmtbaseline", "GENstdbaseline"),
"GS", "PT"),
Scenario = ifelse(Scenario %in% c("GENstdbaseline", "PEDstdbaseline"),
"STANDARD", "BASELINE"),
Category = ifelse(Category %in% c("proven_bulls") & Program %in% c("GS"),
"proven_bulls_geno",
ifelse(Category %in% c("young_bulls") & Program %in% c("GS"),
"young_bulls_geno", Category)))
# calculate mean/sd for replicates and diff in means
df3 = df2 %>%
group_by(Category, Program, REP) %>%
mutate(diff = lag(acc, default = first(acc)) - acc) %>%
ungroup()
df3 = df3 %>%
group_by(Category, Scenario, Program) %>%
summarise(mean_acc = mean(acc),
sd_acc = sd(acc),
mean_diff = mean(diff),
sd_diff = sd(diff),
.groups = "keep")
# write final table to file
write_csv(df3,
"~/Desktop/mDNA_jds_submission/review/final_acc_100r.csv")