Skip to content

Commit

Permalink
Bug Fix: Speed up Unit Tests for Bioc checks
Browse files Browse the repository at this point in the history
  • Loading branch information
Al-Murphy committed Oct 26, 2023
1 parent 6b36d67 commit cfd0927
Show file tree
Hide file tree
Showing 8 changed files with 287 additions and 288 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Package: MungeSumstats
Type: Package
Title: Standardise summary statistics from GWAS
Version: 1.11.0
Version: 1.11.1
Authors@R:
c(person(given = "Alan",
family = "Murphy",
Expand Down
5 changes: 5 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
## CHANGES IN VERSION 1.11.1

### Bug fix
* Speed up unit test timing for bioc checks (predominately for linux tests)

## CHANGES IN VERSION 1.9.19

### New features
Expand Down
Binary file added inst/extdata/ALSvcf.vcf.bgz
Binary file not shown.
Binary file added inst/extdata/ALSvcf.vcf.bgz.tbi
Binary file not shown.
142 changes: 74 additions & 68 deletions tests/testthat/test-check_imputation_cols.R
Original file line number Diff line number Diff line change
@@ -1,73 +1,79 @@
test_that("Check that imputation columns added correctly", {
## The following test uses more than 2GB of memory, which is more
## than what 32-bit Windows can handle:
is_32bit_windows <- .Platform$OS.type == "windows" #&&
#.Platform$r_arch == "i386"
if (!is_32bit_windows) {
pth <- system.file("extdata", "eduAttainOkbay.txt",
package = "MungeSumstats"
## The following test uses more than 2GB of memory, which is more
## than what 32-bit Windows can handle:
is_32bit_windows <- .Platform$OS.type == "windows" #&&
#.Platform$r_arch == "i386"
if (!is_32bit_windows) {
pth <- system.file("extdata", "eduAttainOkbay.txt",
package = "MungeSumstats"
)
#only run not on linux to speed up linux bioc checks
if (Sys.info()["sysname"]!="Linux"){
eduAttainOkbay <- data.table::fread(pth)
# edit to make an rs id be imputed
eduAttainOkbay[1, "MarkerName"] <-
substring(
eduAttainOkbay[1, "MarkerName"], 3,
nchar(eduAttainOkbay[1, "MarkerName"])
)
eduAttainOkbay <- data.table::fread(pth)
# edit to make an rs id be imputed
eduAttainOkbay[1, "MarkerName"] <-
substring(
eduAttainOkbay[1, "MarkerName"], 3,
nchar(eduAttainOkbay[1, "MarkerName"])
)
# write to temp dir
file <- tempfile()
data.table::fwrite(eduAttainOkbay, file)
# run
reformatted <- MungeSumstats::format_sumstats(file,
ref_genome = "GRCh37",
compute_z = TRUE,
compute_n = 1001,
save_format='LDSC',
imputation_ind = TRUE,
allele_flip_check = TRUE,
dbSNP=144
)
res <- data.table::fread(reformatted)
col_headers <- names(res)
imputat_cols <- c(
col_headers[grepl("^IMPUTATION_", col_headers)],
"flipped"["flipped" %in% col_headers],
col_headers[grepl("^convert_", col_headers)]
)
# just check imputation columns exist
expect_equal(length(imputat_cols) > 0, TRUE)
# also check all have at least 1 value present
have_value <- TRUE
for (col_i in imputat_cols) {
col_i_val <- res[[col_i]]
if (length(col_i_val[!is.na(col_i_val)]) == 0) {
have_value <- FALSE
}
# write to temp dir
file <- tempfile()
data.table::fwrite(eduAttainOkbay, file)
# run
reformatted <- MungeSumstats::format_sumstats(file,
ref_genome = "GRCh37",
compute_z = TRUE,
compute_n = 1001,
save_format='LDSC',
imputation_ind = TRUE,
allele_flip_check = TRUE,
dbSNP=144
)
res <- data.table::fread(reformatted)
col_headers <- names(res)
imputat_cols <- c(
col_headers[grepl("^IMPUTATION_", col_headers)],
"flipped"["flipped" %in% col_headers],
col_headers[grepl("^convert_", col_headers)]
)
# just check imputation columns exist
expect_equal(length(imputat_cols) > 0, TRUE)
# also check all have at least 1 value present
have_value <- TRUE
for (col_i in imputat_cols) {
col_i_val <- res[[col_i]]
if (length(col_i_val[!is.na(col_i_val)]) == 0) {
have_value <- FALSE
}
expect_equal(have_value, TRUE)

# check other compute_n values
eduAttainOkbay <- data.table::fread(pth)
eduAttainOkbay[, N_CON := 100]
eduAttainOkbay[, N_CAS := 120]
# write to temp dir
file <- tempfile()
data.table::fwrite(eduAttainOkbay, file)
methods <- c("ldsc", "sum", "giant", "metal")
reformatted <- MungeSumstats::format_sumstats(file,
ref_genome = "GRCh37",
compute_n = methods,
on_ref_genome = FALSE,
strand_ambig_filter = FALSE,
bi_allelic_filter = FALSE,
allele_flip_check = FALSE,
dbSNP=144
)
res <- data.table::fread(reformatted)
expect_equal(all(paste0("Neff_", c("ldsc", "giant", "metal")) %in%
colnames(res)), TRUE)
} else {
expect_equal(is_32bit_windows, TRUE)
expect_equal(is_32bit_windows, TRUE)
}
expect_equal(have_value, TRUE)
} else{
expect_equal(isTRUE(Sys.info()["sysname"]=="Linux"), TRUE)
expect_equal(isTRUE(Sys.info()["sysname"]=="Linux"), TRUE)
}
# check other compute_n values
eduAttainOkbay <- data.table::fread(pth)
eduAttainOkbay[, N_CON := 100]
eduAttainOkbay[, N_CAS := 120]
# write to temp dir
file <- tempfile()
data.table::fwrite(eduAttainOkbay, file)
methods <- c("ldsc", "sum", "giant", "metal")
reformatted <- MungeSumstats::format_sumstats(file,
ref_genome = "GRCh37",
compute_n = methods,
on_ref_genome = FALSE,
strand_ambig_filter = FALSE,
bi_allelic_filter = FALSE,
allele_flip_check = FALSE,
dbSNP=144
)
res <- data.table::fread(reformatted)
expect_equal(all(paste0("Neff_", c("ldsc", "giant", "metal")) %in%
colnames(res)), TRUE)
} else {
expect_equal(is_32bit_windows, TRUE)
expect_equal(is_32bit_windows, TRUE)
expect_equal(is_32bit_windows, TRUE)
}
})
100 changes: 44 additions & 56 deletions tests/testthat/test-indels.R
Original file line number Diff line number Diff line change
@@ -1,58 +1,46 @@
test_that("non-biallelic SNPs are removed", {
## The following test uses more than 2GB of memory, which is more
## than what 32-bit Windows can handle:
is_32bit_windows <- .Platform$OS.type == "windows" #&&
#.Platform$r_arch == "i386"
if (!is_32bit_windows && Sys.info()["sysname"]=="Linux") {
#test to ensure indels aren't removed
ss_indel <- data.table::data.table("SNP"=c("rs34589910","rs12987662"),
"CHR"=c(4,2),
"BP"=c(6364621,100821548),
"A1"=c("C","A"),
"A2"=c("CG","C"),
"Uniq.a1a2"=c("4:6364621_C_CG","aa"),
"EAF"=c(0.0945334,0.3787),
"BETA"=c(-0.00625732297153778,0.027),
"P"=c(0.4883341,2.693e-24))

reformatted_ss_ad <-
MungeSumstats::format_sumstats(ss_indel,ref_genome="GRCh37",
convert_small_p=TRUE,
allele_flip_check=TRUE,
snp_ids_are_rs_ids=TRUE,
return_data=TRUE,
nThread=2,
on_ref_genome = TRUE,
indels = TRUE,
dbSNP=144)
#SNP ID is an indel so won't exist in our SNP reference dataset
testthat::expect_equal("rs34589910" %in% reformatted_ss_ad$SNP,TRUE)

#check that indel missing RS ID is removed rather than imputing wrong RS ID
ss_indel2 <- data.table::data.table("SNP"=c("4:6364621","2:100821548","rs9320913"),
"CHR"=c(4,2,6),
"BP"=c(6364621,100821548,98584733),
"A1"=c("C","A","A"),
"A2"=c("CG","C","C"),
"Uniq.a1a2"=c("4:6364621_C_CG","aa","bb"),
"EAF"=c(0.0945334,0.3787,0.4567),
"BETA"=c(-0.00625732297153778,0.027,0.0123),
"P"=c(0.4883341,2.693e-24,0.00000021))

reformatted_ss_ad2 <-
MungeSumstats::format_sumstats(ss_indel2,ref_genome="GRCh37",
convert_small_p=TRUE,
allele_flip_check=TRUE,
snp_ids_are_rs_ids=TRUE,
return_data=TRUE,
nThread=2,
on_ref_genome = TRUE,
indels = TRUE,
dbSNP=144)
#make sure it was removed
testthat::expect_equal(nrow(reformatted_ss_ad2)==2,TRUE)
} else {
testthat::expect_equal((is_32bit_windows||!Sys.info()["sysname"]=="Linux"), TRUE)
testthat::expect_equal((is_32bit_windows||!Sys.info()["sysname"]=="Linux"), TRUE)
}
## The following test uses more than 2GB of memory, which is more
## than what 32-bit Windows can handle:
is_32bit_windows <- .Platform$OS.type == "windows" #&&
#.Platform$r_arch == "i386"
if (!is_32bit_windows && Sys.info()["sysname"]=="Linux") {
#test to ensure indels aren't removed
# also test indel missing RS ID removed rather than imputing wrong RS ID
ss_indel <- data.table::data.table("SNP"=c("rs34589910","rs12987662",
"4:6364621"),
"CHR"=c(4,2,4),
"BP"=c(6364621,100821548,6364621),
"A1"=c("C","A","C"),
"A2"=c("CG","C","CG"),
"Uniq.a1a2"=c("4:6364621_C_CG","aa",
"4:6364621_C_CG"),
"EAF"=c(0.0945334,0.3787,0.0945334),
"BETA"=c(-0.00625732297153778,0.027,
-0.00625732297153778),
"P"=c(0.4883341,2.693e-24,0.4883341))

reformatted_ss_ad <-
MungeSumstats::format_sumstats(ss_indel,ref_genome="GRCh37",
convert_small_p=TRUE,
allele_flip_check=TRUE,
snp_ids_are_rs_ids=TRUE,
return_data=TRUE,
nThread=2,
on_ref_genome = TRUE,
indels = TRUE,
log_folder_ind = TRUE,
dbSNP=144)
#SNP ID is an indel so won't exist in our SNP reference dataset
testthat::expect_equal("rs34589910" %in%
reformatted_ss_ad$sumstats$SNP,TRUE)
#check that indel missing RS ID is removed rather than imputing
testthat::expect_equal(nrow(fread(
reformatted_ss_ad$log_files$snp_missing_rs)),1)

} else {
testthat::expect_equal((is_32bit_windows||
!Sys.info()["sysname"]=="Linux"), TRUE)
testthat::expect_equal((is_32bit_windows||
!Sys.info()["sysname"]=="Linux"), TRUE)
}
})
Loading

0 comments on commit cfd0927

Please sign in to comment.