@@ -42,22 +42,29 @@ CheckLH <- function(LifeHistData, gID = NA, sorted=TRUE, returnDups = FALSE)
42
42
call. = FALSE )
43
43
44
44
if (ncol(LifeHistData ) < 3 )
45
- stop(" LifeHistData must have at least 3 columns: ID - Sex - BirthYear" )
45
+ stop(" LifeHistData must have at least 3 columns: ID - Sex - BirthYear (or be NULL) " )
46
46
47
47
48
48
# check IDs (column 1) ---
49
49
colnames(LifeHistData )[1 ] <- ' ID'
50
50
LifeHistData $ ID <- as.character(LifeHistData $ ID )
51
51
if (any(grepl(" " , LifeHistData $ ID ))) {
52
- stop(" LifeHistData IDs (column 1) must not include spaces" , call. = FALSE )
52
+ NotOK <- LifeHistData $ ID [grepl(" " , LifeHistData $ ID )]
53
+ cli :: cli_alert_danger(c(" LifeHistData IDs (column 1) contains {length(NotOK)}" ,
54
+ " IDs that include spaces:" ))
55
+ cli :: cli_li(paste0(" '" , NotOK [1 : min(length(NotOK ), 3 )], " '" )) # quotation marks to see leading/trailing blanks
56
+ if (length(NotOK ) > 3 ) cli :: cli_li(" ..." )
57
+ stop(" IDs may only contain alphanumerical characters and underscore" , call. = FALSE )
53
58
}
54
59
if (! all(is.na(gID ))) {
55
60
gID <- as.character(gID )
56
- if (length(intersect(LifeHistData $ ID , gID ))== 0 )
61
+ if (length(intersect(LifeHistData $ ID , gID ))== 0 ) {
62
+ cli :: cli_alert_danger(" Incorrect LifeHistData object, or incorrect format" )
57
63
stop(" None of the IDs in LifeHistData column 1 match the rownames of GenoM" ,
58
64
call. = FALSE )
65
+ }
59
66
} else {
60
- if (sorted ) stop(" if sorted=TRUE, gID cannot be NA " , call. = FALSE )
67
+ if (sorted ) stop(" if sorted=TRUE, gID (ordered IDs in GenoM) must be provided " , call. = FALSE )
61
68
}
62
69
63
70
@@ -75,8 +82,9 @@ CheckLH <- function(LifeHistData, gID = NA, sorted=TRUE, returnDups = FALSE)
75
82
grepl(' max' , LH_colnames ), 5 ),
76
83
' Year.last' = which_else(grepl(' last' , LH_colnames ), 6 ) )
77
84
if (any(duplicated(colnum ))) {
78
- stop(" Confused about LifeHistData column order; Please provide data as " ,
79
- " ID - Sex - BirthYear - BY.min - BY.max - Year.last" , call. = FALSE )
85
+ cli :: cli_alert_danger(" Column order and/or column names in LifeHistData is unclear" )
86
+ stop(" Please provide LifeHistData as " ,
87
+ " ID - Sex - BirthYear (- BY.min - BY.max - Year.last)" , call. = FALSE )
80
88
}
81
89
82
90
# optional columns
@@ -86,7 +94,6 @@ CheckLH <- function(LifeHistData, gID = NA, sorted=TRUE, returnDups = FALSE)
86
94
}
87
95
}
88
96
89
-
90
97
# column renaming & order ---
91
98
for (x in seq_along(colnum )) {
92
99
colnames(LifeHistData )[ colnum [x ] ] <- names(colnum )[x ]
@@ -96,16 +103,26 @@ CheckLH <- function(LifeHistData, gID = NA, sorted=TRUE, returnDups = FALSE)
96
103
97
104
# check Sex ----
98
105
if (! all(LifeHistData $ Sex %in% 1 : 4 )) {
106
+ prop_sex_OK <- sum(LifeHistData $ Sex %in% c(1 : 4 ,NA )) / nrow(LifeHistData )
99
107
sex_msg <- " LifeHistData column 'Sex' should be coded as 1=female, 2=male, 3/<NA>=unknown, 4=hermaphrodite"
100
- if (sum(LifeHistData $ Sex %in% 1 : 4 ) < nrow(LifeHistData )/ 10 ) { # <10% valid non-missing coding
101
- stop(sex_msg , call. = FALSE )
108
+ if (prop_sex_OK < 0.1 ) {
109
+ # recode from F/M/.. to 1/2/3?
110
+ if (sum(LifeHistData $ Sex %in% c(' f' ,' F' ,' m' ,' M' ))/ nrow(LifeHistData ) > 0.1 ) {
111
+ cli :: cli_alert_warning(" Recoding `LifeHistData` column `Sex` from F/M/.. to 1=female, 2=male, 3=unknown" )
112
+ LifeHistData $ Sex [LifeHistData $ Sex %in% c(' f' ,' F' )] <- 1
113
+ LifeHistData $ Sex [LifeHistData $ Sex %in% c(' m' ,' M' )] <- 2
114
+ } else {
115
+ cli :: cli_alert_danger(c(' {(round((1-prop_sex_OK)*100)}% of entries' ,
116
+ " in `LifeHistData` column `Sex` have an unrecognised coding" ))
117
+ stop(sex_msg , call. = FALSE )
118
+ }
102
119
} else {
103
- if (! all(LifeHistData $ Sex %in% c(1 , 2 , 3 , 4 ,NA ))) {
104
- warning( sex_msg , " \n These values are converted to <NA>/3: " , setdiff( LifeHistData $ Sex , 1 : 4 ),
105
- call. = FALSE , immediate. = TRUE )
120
+ if (! all(LifeHistData $ Sex %in% c(1 : 4 ,NA ))) {
121
+ cli :: cli_alert_warning(c( sex_msg , " \n The following values are converted to 3=unknown: " ))
122
+ cli :: cli_li(setdiff( LifeHistData $ Sex , c( 1 : 4 , NA )) )
106
123
}
107
- LifeHistData $ Sex [! LifeHistData $ Sex %in% c(1 : 4 )] <- 3
108
124
}
125
+ LifeHistData $ Sex [! LifeHistData $ Sex %in% c(1 : 4 )] <- 3
109
126
}
110
127
111
128
@@ -118,8 +135,8 @@ CheckLH <- function(LifeHistData, gID = NA, sorted=TRUE, returnDups = FALSE)
118
135
for (x in c(" BirthYear" , " BY.min" , " BY.max" , ' Year.last' )) {
119
136
IsInt <- check.integer(LifeHistData [,x ])
120
137
if (any(! IsInt , na.rm = TRUE )) {
121
- warning (" In LifeHistData column " , x , " , these values are converted to <NA>/-999: " ,
122
- unique(LifeHistData [,x ][IsInt %in% FALSE ]), immediate. = TRUE , call. = FALSE )
138
+ cli :: cli_alert_warning (" In ` LifeHistData` column `{x}`, the following values are converted to <NA>/-999: " )
139
+ cli :: cli_li( unique(LifeHistData [,x ][IsInt %in% FALSE ]))
123
140
}
124
141
LifeHistData [, x ] <- ifelse(IsInt , suppressWarnings(as.integer(as.character(LifeHistData [, x ]))), NA )
125
142
LifeHistData [is.na(LifeHistData [,x ]), x ] <- - 999
@@ -142,7 +159,7 @@ CheckLH <- function(LifeHistData, gID = NA, sorted=TRUE, returnDups = FALSE)
142
159
LHdup [, c(' ID' , intersect(col_order , colnames(LHdup )))],
143
160
stringsAsFactors = FALSE )
144
161
}
145
- message (" duplicate IDs found in lifehistory data , first entry will be used" )
162
+ cli :: cli_alert_warning (" duplicate IDs found in `LifeHistData` , first entry will be used" )
146
163
LifeHistData <- LifeHistData [! duplicated(LifeHistData $ ID ), ]
147
164
}
148
165
if (returnDups & ! all(is.na(gID ))) {
0 commit comments