Skip to content

Commit d627f36

Browse files
committed
update to v2.11
1 parent bb9f9cb commit d627f36

32 files changed

+1102
-626
lines changed

.gitignore

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
.Rproj.user
2+
.Rhistory
3+
.RData
4+
.Ruserdata
5+
src/*.o
6+
src/*.so
7+
src/*.dll

DESCRIPTION

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
Package: sequoia
22
Type: Package
33
Title: Pedigree Inference from SNPs
4-
Version: 2.9.1
5-
Date: 2024-04-24
4+
Version: 2.11
5+
Date: 2024-05-26
66
Authors@R: c(person("Jisca", "Huisman", email = "[email protected]",
77
role = c("aut", "cre")))
88
Author: Jisca Huisman [aut, cre]
@@ -17,7 +17,8 @@ Imports:
1717
plyr (>= 1.8.0),
1818
stats,
1919
utils,
20-
graphics
20+
graphics,
21+
cli
2122
RoxygenNote: 7.3.1
2223
Suggests:
2324
openxlsx,
@@ -32,3 +33,4 @@ Suggests:
3233
VignetteBuilder: knitr, R.rsp
3334
NeedsCompilation: yes
3435
Depends: R (>= 3.5.0)
36+
SystemRequirements: Fortran95

NEWS.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,8 @@
1+
# sequoia 2.11
2+
- improved assignment rate when some birth years are unknown
3+
- improved messages, with {cli} markup
4+
- a few minor bugfixes in the Fortran code
5+
16
# sequoia 2.9.1
27
- upgrade of `GenoConvert`: new vcf and genlight input, various bugs fixed,
38
behaviour made more consistent and clearer through additional messages.

R/CalcOHLLR.R

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -165,7 +165,7 @@ CalcOHLLR <- function(Pedigree = NULL,
165165
if (x %in% names(SeqList)) {
166166
if (x == "PedigreePar" & "Pedigree" %in% names(SeqList)) next
167167
if (x == "AgePrior" & AgePrior == FALSE) next
168-
if (!quiet) message("using ", x, " in SeqList")
168+
if (!quiet) cli::cli_alert_info("using {x} in SeqList")
169169
assign(NewName[x], SeqList[[x]])
170170
}
171171
}

R/CalcPairLL.R

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -229,14 +229,22 @@ CalcPairLL <- function(Pairs = NULL,
229229
if (x %in% names(SeqList)) {
230230
if (x == "PedigreePar" & "Pedigree" %in% names(SeqList)) next
231231
if (x == "AgePriors" & AgePrior == FALSE) next
232-
if (!quiet) message("using ", x, " in SeqList")
232+
if (!quiet) cli::cli_alert_info("using {x} in SeqList")
233233
assign(NewName[x], SeqList[[x]])
234234
}
235235
}
236236
}
237237

238238
Ped <- PedPolish(Pedigree, gID, DropNonSNPd=FALSE, NullOK = TRUE)
239239
# Ped=NULL if Pedigree=NULL & gID=NULL
240+
if (!quiet) {
241+
if (is.null(Ped)) {
242+
cli::cli_alert_info("Not conditioning on any pedigree")
243+
} else {
244+
N <- c(i=nrow(Ped), d=sum(!is.na(Ped$dam)), s=sum(!is.na(Ped$sire)))
245+
cli::cli_alert_info("Conditioning on pedigree with {N['i']} individuals, {N['d']} dams and {N['s']} sires")
246+
}
247+
}
240248

241249
LHF <- CheckLH(LifeHistData, rownames(GenoM), sorted=TRUE)
242250
LHF$Sex[LHF$Sex==4] <- 3
@@ -278,7 +286,7 @@ CalcPairLL <- function(Pairs = NULL,
278286

279287
# Specs / param ----
280288
if ("Specs" %in% names(SeqList)) {
281-
if(!quiet) message("settings in SeqList$Specs will overrule input parameters")
289+
if(!quiet) cli::cli_alert_info("settings in SeqList$Specs will overrule input parameters")
282290
PARAM <- SpecsToParam(SeqList$Specs,
283291
ErrM=SeqList$ErrM, ErrFlavour=ErrFlavour,
284292
dimGeno = dim(GenoM), quiet, Plot) # overrule values in SeqList
@@ -370,7 +378,7 @@ CalcPairLL <- function(Pairs = NULL,
370378
# plot ----
371379
if (Plot) {
372380
if (nrow(Pairs) > 1e4) {
373-
message("Plot not shown when >10.000 pairs")
381+
cli::cli_alert_info("Plot not shown when >10.000 pairs")
374382
} else {
375383
PlotPairLL(Pairs.OUT)
376384
}
@@ -410,7 +418,7 @@ FortifyPairs <- function(Pairs, # pairs with character IDs etc
410418
if (ncol(Pairs)==2) {
411419
colnames(Pairs) <- c("ID1", "ID2")
412420
} else if (!all(c("ID1", "ID2") %in% names(Pairs))) {
413-
warning("Assuming columns 1 and 2 of Pairs are 'ID1' and 'ID2'")
421+
cli::cli_alert_warning("Assuming columns 1 and 2 of Pairs are 'ID1' and 'ID2'")
414422
colnames(Pairs)[1:2] <- c("ID1", "ID2")
415423
}
416424
for (x in c("ID1", "ID2")) Pairs[,x] <- as.character(Pairs[,x])

R/CalcRped.R

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ CalcRped <- function(Pedigree, OUT="DF")
1818
stop("'OUT' must be 'M' (matrix) or 'DF' (data.frame)")
1919

2020
if (!requireNamespace("kinship2", quietly = TRUE)) {
21-
if (interactive()) message("Installing pkg 'kinship2'... ")
21+
if (interactive()) cli::cli_alert_info("Installing package {.pkg kinship2}... ")
2222
utils::install.packages("kinship2")
2323
}
2424

R/CheckGeno.R

Lines changed: 14 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -110,11 +110,6 @@ CheckGeno <- function(GenoM, quiet=FALSE, Plot=FALSE,
110110
}
111111
}
112112

113-
warn <- function(...)
114-
warning(paste(strwrap(paste(...), prefix=" "), "\n"), # strwrap() destroys whitespace
115-
call. = FALSE, immediate.=TRUE)
116-
117-
118113
# Exclude low quality SNPs ----
119114
Excl <- list()
120115
sstats <- SnpStats(GenoM, Plot = Plot)
@@ -125,24 +120,24 @@ CheckGeno <- function(GenoM, quiet=FALSE, Plot=FALSE,
125120
SNPs.TooMuchMissing <- sstats[,"Mis"] >= 1 - 1/nrow(GenoM) # genotyped for 0 or 1 indiv --> no use
126121
}
127122
if (any(SNPs.TooMuchMissing)) {
128-
warn("There are ", sum(SNPs.TooMuchMissing),
129-
" SNPs scored for ", ifelse(Strict, "<5% of", "0 or 1"), " individuals, these will be excluded")
123+
cli::cli_alert_warning(c("There are {sum(SNPs.TooMuchMissing)}",
124+
" SNPs scored for {ifelse(Strict, '<5% of', '0 or 1')} individuals, these will be excluded"))
130125
GenoM <- GenoM[, !SNPs.TooMuchMissing]
131126
Excl[["ExcludedSnps"]] <- which(SNPs.TooMuchMissing)
132127
sstats <- SnpStats(GenoM, Plot = FALSE) # update SnpStats
133128
}
134129

135130
SNPs.mono <- sstats[,"AF"] <= 1/(2*nrow(GenoM)) | sstats[,"AF"] >= 1 - 1/(2*nrow(GenoM))
136131
if (any(SNPs.mono)) {
137-
warn("There are ", sum(SNPs.mono)," monomorphic (fixed) SNPs, these will be excluded")
132+
cli::cli_alert_warning("There are {sum(SNPs.mono)} monomorphic (fixed) SNPs, these will be excluded")
138133
GenoM <- GenoM[, !SNPs.mono]
139134
Excl[["ExcludedSnps-mono"]] <- which(SNPs.mono)
140135
}
141136

142137
SNPs.LotMissing <- apply(GenoM, 2, function(x) sum(x>=0)) < nrow(GenoM)/2
143138
if (any(SNPs.LotMissing)) {
144-
warn(ifelse("ExcludedSnps" %in% names(Excl), "In addition, there", "There"),
145-
" are ", sum(SNPs.LotMissing)," SNPs scored for <50% of individuals")
139+
cli::cli_alert_warning(c('{ifelse("ExcludedSnps" %in% names(Excl), "In addition, there", "There")}',
140+
" are {sum(SNPs.LotMissing)} SNPs scored for <50% of individuals"))
146141
Excl[["Snps-LowCallRate"]] <- which(SNPs.LotMissing)
147142
}
148143

@@ -155,30 +150,30 @@ CheckGeno <- function(GenoM, quiet=FALSE, Plot=FALSE,
155150
Indiv.TooMuchMissing <- Lscored <= 1 # genotyped for 0 or 1 SNPs --> no point
156151
}
157152
if (any(Indiv.TooMuchMissing)) {
158-
warn("\n *********** \n There are ", sum(Indiv.TooMuchMissing),
159-
" individuals scored for ", ifelse(Strict, "<5% of", "0 or 1"), " SNPs, ",
160-
"these WILL BE IGNORED \n ***********")
153+
cli::cli_alert_danger(c("There are {.strong {sum(Indiv.TooMuchMissing)} individuals}",
154+
' scored for {ifelse(Strict, "<5% of", "0 or 1")} SNPs, ',
155+
"these {.strong WILL BE IGNORED}"))
161156
Excl[["ExcludedIndiv"]] <- rownames(GenoM)[which(Indiv.TooMuchMissing)]
162157
GenoM <- GenoM[!Indiv.TooMuchMissing, ]
163158
}
164159

165160
Indiv.LotMissing <- apply(GenoM, 1, function(x) sum(x>=0)) < ncol(GenoM)/5
166161
if (any(Indiv.LotMissing)) {
167-
warn(ifelse("ExcludedIndiv" %in% names(Excl), "In addition, there", "There"),
168-
" are ", sum(Indiv.LotMissing)," individuals scored for <20% of SNPs,",
169-
"it is advised to treat their assignments with caution")
162+
cli::cli_alert_danger(c('{ifelse("ExcludedIndiv" %in% names(Excl), "In addition, there", "There")}',
163+
" are {sum(Indiv.LotMissing)} individuals scored for <20% of SNPs,",
164+
"it is advised to treat their assignments with caution"))
170165
Excl[["Indiv-LowCallRate"]] <- rownames(GenoM)[Indiv.LotMissing]
171166
}
172167

173168

174169
if (!quiet) {
175170
MSG <- paste("There are ", nrow(GenoM), " individuals and ", ncol(GenoM), " SNPs.\n")
176171
if (any(c("ExcludedSnps", "ExcludedSnps-mono", "ExcludedIndiv") %in% names(Excl))) {
177-
message("After exclusion, ", MSG)
172+
cli::cli_alert_info(c("After exclusion, ", MSG))
178173
} else if (length(Excl)>0) {
179-
message(MSG)
174+
cli::cli_alert_info(MSG)
180175
} else {
181-
message("Genotype matrix looks OK! ", MSG)
176+
cli::cli_alert_success(c("Genotype matrix looks OK! ", MSG))
182177
}
183178
}
184179

R/CheckLifeHist.R

Lines changed: 33 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -42,22 +42,29 @@ CheckLH <- function(LifeHistData, gID = NA, sorted=TRUE, returnDups = FALSE)
4242
call.=FALSE)
4343

4444
if (ncol(LifeHistData) < 3)
45-
stop("LifeHistData must have at least 3 columns: ID - Sex - BirthYear")
45+
stop("LifeHistData must have at least 3 columns: ID - Sex - BirthYear (or be NULL)")
4646

4747

4848
# check IDs (column 1) ---
4949
colnames(LifeHistData)[1] <- 'ID'
5050
LifeHistData$ID <- as.character(LifeHistData$ID)
5151
if (any(grepl(" ", LifeHistData$ID))) {
52-
stop("LifeHistData IDs (column 1) must not include spaces", call.=FALSE)
52+
NotOK <- LifeHistData$ID[grepl(" ", LifeHistData$ID)]
53+
cli::cli_alert_danger(c("LifeHistData IDs (column 1) contains {length(NotOK)}",
54+
" IDs that include spaces:"))
55+
cli::cli_li(paste0("'", NotOK[1:min(length(NotOK), 3)], "'")) # quotation marks to see leading/trailing blanks
56+
if (length(NotOK) > 3) cli::cli_li("...")
57+
stop("IDs may only contain alphanumerical characters and underscore", call.=FALSE)
5358
}
5459
if (!all(is.na(gID))) {
5560
gID <- as.character(gID)
56-
if (length(intersect(LifeHistData$ID, gID))==0)
61+
if (length(intersect(LifeHistData$ID, gID))==0) {
62+
cli::cli_alert_danger("Incorrect LifeHistData object, or incorrect format")
5763
stop("None of the IDs in LifeHistData column 1 match the rownames of GenoM",
5864
call.=FALSE)
65+
}
5966
} else {
60-
if (sorted) stop("if sorted=TRUE, gID cannot be NA", call.=FALSE)
67+
if (sorted) stop("if sorted=TRUE, gID (ordered IDs in GenoM) must be provided", call.=FALSE)
6168
}
6269

6370

@@ -75,8 +82,9 @@ CheckLH <- function(LifeHistData, gID = NA, sorted=TRUE, returnDups = FALSE)
7582
grepl('max', LH_colnames), 5),
7683
'Year.last' = which_else(grepl('last', LH_colnames), 6) )
7784
if (any(duplicated(colnum))) {
78-
stop("Confused about LifeHistData column order; Please provide data as ",
79-
"ID - Sex - BirthYear - BY.min - BY.max - Year.last", call.=FALSE)
85+
cli::cli_alert_danger("Column order and/or column names in LifeHistData is unclear")
86+
stop("Please provide LifeHistData as ",
87+
"ID - Sex - BirthYear (- BY.min - BY.max - Year.last)", call.=FALSE)
8088
}
8189

8290
# optional columns
@@ -86,7 +94,6 @@ CheckLH <- function(LifeHistData, gID = NA, sorted=TRUE, returnDups = FALSE)
8694
}
8795
}
8896

89-
9097
# column renaming & order ---
9198
for (x in seq_along(colnum)) {
9299
colnames(LifeHistData)[ colnum[x] ] <- names(colnum)[x]
@@ -96,16 +103,26 @@ CheckLH <- function(LifeHistData, gID = NA, sorted=TRUE, returnDups = FALSE)
96103

97104
# check Sex ----
98105
if (!all(LifeHistData$Sex %in% 1:4)) {
106+
prop_sex_OK <- sum(LifeHistData$Sex %in% c(1:4,NA)) / nrow(LifeHistData)
99107
sex_msg <- "LifeHistData column 'Sex' should be coded as 1=female, 2=male, 3/<NA>=unknown, 4=hermaphrodite"
100-
if (sum(LifeHistData$Sex %in% 1:4) < nrow(LifeHistData)/10) { # <10% valid non-missing coding
101-
stop(sex_msg, call.=FALSE)
108+
if (prop_sex_OK < 0.1) {
109+
# recode from F/M/.. to 1/2/3?
110+
if (sum(LifeHistData$Sex %in% c('f','F','m','M'))/nrow(LifeHistData) > 0.1) {
111+
cli::cli_alert_warning("Recoding `LifeHistData` column `Sex` from F/M/.. to 1=female, 2=male, 3=unknown")
112+
LifeHistData$Sex[LifeHistData$Sex %in% c('f','F')] <- 1
113+
LifeHistData$Sex[LifeHistData$Sex %in% c('m','M')] <- 2
114+
} else {
115+
cli::cli_alert_danger(c('{(round((1-prop_sex_OK)*100)}% of entries',
116+
" in `LifeHistData` column `Sex` have an unrecognised coding"))
117+
stop(sex_msg, call.=FALSE)
118+
}
102119
} else {
103-
if (!all(LifeHistData$Sex %in% c(1,2,3,4,NA))) {
104-
warning(sex_msg, "\n These values are converted to <NA>/3: ", setdiff(LifeHistData$Sex, 1:4),
105-
call.=FALSE, immediate.=TRUE)
120+
if (!all(LifeHistData$Sex %in% c(1:4,NA))) {
121+
cli::cli_alert_warning(c(sex_msg, "\nThe following values are converted to 3=unknown:"))
122+
cli::cli_li(setdiff(LifeHistData$Sex, c(1:4,NA)))
106123
}
107-
LifeHistData$Sex[!LifeHistData$Sex %in% c(1:4)] <- 3
108124
}
125+
LifeHistData$Sex[!LifeHistData$Sex %in% c(1:4)] <- 3
109126
}
110127

111128

@@ -118,8 +135,8 @@ CheckLH <- function(LifeHistData, gID = NA, sorted=TRUE, returnDups = FALSE)
118135
for (x in c("BirthYear", "BY.min", "BY.max", 'Year.last')) {
119136
IsInt <- check.integer(LifeHistData[,x])
120137
if (any(!IsInt, na.rm=TRUE)) {
121-
warning("In LifeHistData column ", x, ", these values are converted to <NA>/-999: ",
122-
unique(LifeHistData[,x][IsInt %in% FALSE]), immediate.=TRUE, call.=FALSE)
138+
cli::cli_alert_warning("In `LifeHistData` column `{x}`, the following values are converted to <NA>/-999: ")
139+
cli::cli_li(unique(LifeHistData[,x][IsInt %in% FALSE]))
123140
}
124141
LifeHistData[, x] <- ifelse(IsInt, suppressWarnings(as.integer(as.character(LifeHistData[, x]))), NA)
125142
LifeHistData[is.na(LifeHistData[,x]), x] <- -999
@@ -142,7 +159,7 @@ CheckLH <- function(LifeHistData, gID = NA, sorted=TRUE, returnDups = FALSE)
142159
LHdup[, c('ID', intersect(col_order, colnames(LHdup)))],
143160
stringsAsFactors = FALSE)
144161
}
145-
message("duplicate IDs found in lifehistory data, first entry will be used")
162+
cli::cli_alert_warning("duplicate IDs found in `LifeHistData`, first entry will be used")
146163
LifeHistData <- LifeHistData[!duplicated(LifeHistData$ID), ]
147164
}
148165
if (returnDups & !all(is.na(gID))) {

R/ConfProb.R

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,8 @@
7070
#' @param nCores number of computer cores to use. If \code{>1}, package
7171
#' \pkg{parallel} is used. Set to NULL to use all but one of the available
7272
#' cores, as detected by \code{parallel::detectCores()} (using all cores tends
73-
#' to freeze up your computer).
73+
#' to freeze up your computer). With large datasets, the amount of computer
74+
#' memory may be the limiting factor for the number of cores you can use.
7475
#' @param quiet suppress messages. \code{TRUE} runs \code{SimGeno} and
7576
#' \code{sequoia} quietly, \code{'very'} also suppresses other messages and
7677
#' the iteration counter when \code{nCores=1} (there is no iteration counter
@@ -206,7 +207,7 @@ EstConf <- function(Pedigree = NULL,
206207

207208
# check input ----
208209
if (is.null(Pedigree)) stop("Please provide Pedigree")
209-
if (is.null(LifeHistData)) warning("Running without LifeHistData")
210+
if (is.null(LifeHistData)) cli::cli_alert_warning("Running without `LifeHistData`")
210211
if (!is.null(args.sim) & !is.list(args.sim)) stop("args.sim should be a list or NULL")
211212
if (!is.null(args.seq) & !is.list(args.seq)) stop("args.seq should be a list or NULL")
212213
if (!is.wholenumber(nSim) || nSim<1 || length(nSim)>1)
@@ -238,9 +239,9 @@ EstConf <- function(Pedigree = NULL,
238239

239240
if (!quiet.EC) {
240241
if (ParSib == "par") {
241-
message("Simulating parentage assignment only ...")
242+
cli::cli_alert_info("Simulating parentage assignment only ...")
242243
} else {
243-
message("Simulating full pedigree reconstruction ...")
244+
cli::cli_alert_info("Simulating full pedigree reconstruction ...")
244245
}
245246
}
246247

@@ -252,7 +253,7 @@ EstConf <- function(Pedigree = NULL,
252253
if (is.null(nCores) || nCores>1) {
253254
if (!requireNamespace("parallel", quietly = TRUE)) {
254255
if (interactive() & !quiet.EC) {
255-
message("Installing pkg 'parallel' to speed things up... ")
256+
cli::cli_alert_info("Installing package {.pkg parallel} to speed things up... ")
256257
}
257258
utils::install.packages("parallel")
258259
}
@@ -261,13 +262,12 @@ EstConf <- function(Pedigree = NULL,
261262
nCores <- maxCores -1
262263
} else if (nCores > maxCores) {
263264
nCores <- maxCores
264-
warning("Reducing 'nCores' to ", maxCores, ", as that's all you have",
265-
immediate.=TRUE)
265+
cli::cli_alert_warning("Reducing {.code nCores} to {maxCores}, as that's all you have")
266266
}
267267
if (nCores > nSim) {
268268
nCores <- nSim
269269
}
270-
if (!quiet.EC) message("Using ", nCores, " out of ", maxCores, " cores")
270+
if (!quiet.EC) cli::cli_alert_info("Using {nCores} out of {maxCores} cores")
271271
}
272272

273273

R/DuplicateCheck.R

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -82,11 +82,12 @@ DuplicateCheck <- function(GenoM = NULL,
8282

8383
# print warnings
8484
if (!quiet) {
85-
if (any(duplicated(gID))) warning("duplicate IDs found in genotype data, please remove to avoid confusion",
86-
immediate. = TRUE)
85+
if (any(duplicated(gID))) cli::cli_alert_danger(c("duplicate IDs found in `GenoM`, ",
86+
"please remove to avoid confusion"))
8787
if (DUP$ndupgenos>0 && DUP$ndupgenos > sum(duplicated(gID))) {
88-
message("There were ", ifelse(DUP$ndupgenos == Ng, ">=", ""),
89-
DUP$ndupgenos, " likely duplicate genotypes found, consider removing")
88+
cli::cli_alert_info(c('Found {ifelse(DUP$ndupgenos == Ng, ">=", "")}',
89+
"{DUP$ndupgenos} likely duplicate genotypes,",
90+
"please double check and consider removing"), wrap=TRUE)
9091
}
9192
}
9293

0 commit comments

Comments
 (0)