Skip to content

Commit e18d02a

Browse files
committed
add param min.maf to autoSVD
1 parent ffd8246 commit e18d02a

File tree

7 files changed

+67
-22
lines changed

7 files changed

+67
-22
lines changed

DESCRIPTION

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,8 @@ Encoding: UTF-8
22
Package: bigsnpr
33
Type: Package
44
Title: Analysis of Massive SNP Arrays
5-
Version: 1.12.13
6-
Date: 2024-09-04
5+
Version: 1.12.14
6+
Date: 2024-09-10
77
Authors@R: c(
88
person("Florian", "Privé", email = "[email protected]", role = c("aut", "cre")),
99
person("Michael", "Blum", role = "ths"),

NEWS.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,7 @@
1+
## bigsnpr 1.12.14
2+
3+
- Add a new `min.maf = 0.02` parameter to `snp_autoSVD()` and `bed_autoSVD()`. Then variants are now discarded when they have either a small MAC or a small MAF.
4+
15
## bigsnpr 1.12.13
26

37
- Can now use matrix accessors for class `bed_light` as well.

R/autoSVD.R

Lines changed: 20 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,9 @@ getIntervals <- function(x, n = 2) {
4545
#' @param alpha.tukey Default is `0.1`. The type-I error rate in outlier
4646
#' detection (that is further corrected for multiple testing).
4747
#' @param min.mac Minimum minor allele count (MAC) for variants to be included.
48-
#' Default is `10`.
48+
#' Default is `10`. Can actually be higher because of `min.maf`.
49+
#' @param min.maf Minimum minor allele frequency (MAF) for variants to be included.
50+
#' Default is `0.02`. Can actually be higher because of `min.mac`.
4951
#' @param max.iter Maximum number of iterations of outlier detection.
5052
#' Default is `5`.
5153
#'
@@ -75,6 +77,7 @@ snp_autoSVD <- function(G,
7577
int.min.size = 20,
7678
alpha.tukey = 0.05,
7779
min.mac = 10,
80+
min.maf = 0.02,
7881
max.iter = 5,
7982
is.size.in.bp = NULL,
8083
ncores = 1,
@@ -90,15 +93,15 @@ snp_autoSVD <- function(G,
9093
# Verbose?
9194
printf2 <- function(...) if (verbose) printf(...)
9295

93-
if (min.mac > 0) {
96+
if (min.mac > 0 && min.maf > 0) {
9497
maf <- snp_MAF(G, ind.row, ind.col, ncores = ncores)
95-
min.maf <- min.mac / (2 * length(ind.row))
96-
mac.nok <- (maf < min.maf)
97-
printf2("Discarding %d variant%s with MAC < %s.\n", sum(mac.nok),
98-
`if`(sum(mac.nok) > 1, "s", ""), min.mac)
99-
ind.keep <- ind.col[!mac.nok]
98+
maf.nok <- (maf < max(min.maf, min.mac / (2 * length(ind.row))))
99+
printf2("Discarding %d variant%s with MAC < %s or MAF < %s.\n",
100+
sum(maf.nok), `if`(sum(maf.nok) > 1, "s", ""), min.mac, min.maf)
101+
ind.keep <- ind.col[!maf.nok]
100102
} else {
101-
stop2("You cannot use variants with no variation; set min.mac > 0.")
103+
stop2("You cannot use variants with no variation; %s",
104+
"set min.mac > 0 and min.maf > 0.")
102105
}
103106

104107
# First clumping
@@ -231,6 +234,7 @@ bed_autoSVD <- function(obj.bed,
231234
int.min.size = 20,
232235
alpha.tukey = 0.05,
233236
min.mac = 10,
237+
min.maf = 0.02,
234238
max.iter = 5,
235239
ncores = 1,
236240
verbose = TRUE) {
@@ -243,14 +247,15 @@ bed_autoSVD <- function(obj.bed,
243247
# Verbose?
244248
printf2 <- function(...) if (verbose) printf(...)
245249

246-
if (min.mac > 0) {
247-
mac <- bed_MAF(obj.bed, ind.row, ind.col, ncores = ncores)$mac
248-
mac.nok <- (mac < min.mac)
249-
printf2("Discarding %d variant%s with MAC < %s.\n", sum(mac.nok),
250-
`if`(sum(mac.nok) > 1, "s", ""), min.mac)
251-
ind.keep <- ind.col[!mac.nok]
250+
if (min.mac > 0 && min.maf > 0) {
251+
info <- bed_MAF(obj.bed, ind.row, ind.col, ncores = ncores)
252+
maf.nok <- (info$mac < min.mac | info$maf < min.maf)
253+
printf2("Discarding %d variant%s with MAC < %s or MAF < %s.\n",
254+
sum(maf.nok), `if`(sum(maf.nok) > 1, "s", ""), min.mac, min.maf)
255+
ind.keep <- ind.col[!maf.nok]
252256
} else {
253-
stop2("You cannot use variants with no variation; set min.mac > 0.")
257+
stop2("You cannot use variants with no variation; %s",
258+
"set min.mac > 0 and min.maf > 0.")
254259
}
255260

256261
# First clumping

docs/news/index.html

Lines changed: 5 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/bed_projectPCA.Rd

Lines changed: 3 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/snp_autoSVD.Rd

Lines changed: 6 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

tests/testthat/test-2-autoSVD.R

Lines changed: 27 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@ CHR <- test$map$chromosome
1010
POS <- test$map$physical.pos / 10
1111
POS2 <- round(POS + 1)
1212

13+
obj.bed <- bed(snp_writeBed(test, bedfile = tempfile(fileext = ".bed")))
14+
1315
################################################################################
1416

1517
test_that("snp_autoSVD() works", {
@@ -65,8 +67,6 @@ test_that("snp_autoSVD() works", {
6567

6668
test_that("bed_autoSVD() works", {
6769

68-
obj.bed <- bed(snp_writeBed(test, bedfile = tempfile(fileext = ".bed")))
69-
7070
expect_error(bed_autoSVD(obj.bed, min.mac = 0), "no variation; set min.mac > 0")
7171
expect_output(bed_autoSVD(obj.bed, thr.r2 = NA), "Skipping clumping.")
7272

@@ -96,3 +96,28 @@ test_that("bed_autoSVD() works", {
9696
})
9797

9898
################################################################################
99+
100+
test_that("MAC/MAF thresholds work", {
101+
102+
info <- bed_MAF(obj.bed)
103+
104+
replicate(10, {
105+
106+
min.mac <- sample(1:40, size = 1)
107+
min.maf <- runif(n = 1, min = 0.01, max = 0.1)
108+
109+
obj.svd1 <- snp_autoSVD(G, CHR, size = 5, min.mac = min.mac, min.maf = min.maf,
110+
thr.r2 = NA, max.iter = 0, verbose = FALSE)
111+
ind1 <- attr(obj.svd1, "subset")
112+
expect_true(all(info$maf[ind1] >= min.maf))
113+
expect_true(all(info$mac[ind1] >= min.mac))
114+
115+
obj.svd2 <- bed_autoSVD(obj.bed, min.mac = min.mac, min.maf = min.maf,
116+
thr.r2 = NA, max.iter = 0, verbose = FALSE)
117+
ind2 <- attr(obj.svd2, "subset")
118+
expect_true(all(info$maf[ind2] >= min.maf))
119+
expect_true(all(info$mac[ind2] >= min.mac))
120+
})
121+
})
122+
123+
################################################################################

0 commit comments

Comments
 (0)