From cfd0927f1ae1c396a93e5a74a26ad0e807d08242 Mon Sep 17 00:00:00 2001 From: Al-Murphy Date: Thu, 26 Oct 2023 11:13:36 +0100 Subject: [PATCH] Bug Fix: Speed up Unit Tests for Bioc checks --- DESCRIPTION | 2 +- NEWS.md | 5 + inst/extdata/ALSvcf.vcf.bgz | Bin 0 -> 5599 bytes inst/extdata/ALSvcf.vcf.bgz.tbi | Bin 0 -> 286 bytes tests/testthat/test-check_imputation_cols.R | 142 ++++++------- tests/testthat/test-indels.R | 100 ++++----- tests/testthat/test-multi_rs_snp_one_row.R | 212 ++++++++++---------- tests/testthat/test-on_ref_genome.R | 114 +++++------ 8 files changed, 287 insertions(+), 288 deletions(-) create mode 100644 inst/extdata/ALSvcf.vcf.bgz create mode 100644 inst/extdata/ALSvcf.vcf.bgz.tbi diff --git a/DESCRIPTION b/DESCRIPTION index 2bc61bc..42ba912 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: MungeSumstats Type: Package Title: Standardise summary statistics from GWAS -Version: 1.11.0 +Version: 1.11.1 Authors@R: c(person(given = "Alan", family = "Murphy", diff --git a/NEWS.md b/NEWS.md index cafebdc..2ed4de5 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,8 @@ +## CHANGES IN VERSION 1.11.1 + +### Bug fix +* Speed up unit test timing for bioc checks (predominately for linux tests) + ## CHANGES IN VERSION 1.9.19 ### New features diff --git a/inst/extdata/ALSvcf.vcf.bgz b/inst/extdata/ALSvcf.vcf.bgz new file mode 100644 index 0000000000000000000000000000000000000000..fb5de17b9ab8093b3dd9f8d6b5ba26feae7f65bc GIT binary patch literal 5599 zcmV<56(H&#iwFb&00000{{{d;LjnN8742PZZzDI7{_OmU#^&RVY!=_ydp^MDvAqT+ znc1-~ce{tfKqI$p-P)2*k{u+!{)$ztR;!zeTgd|B4+Mf3(v+TuO%{t)#UlIr?{B9M z)$M%oI9?t9bb9`x59N2?e}8`Y;ri_J@xNZaKmK%bb@lFjwVW)bPpj#CcKoN4hlc`x zwW=1&;%U5GRyTk8xA>cnpMN;Hrr(^Lzx(m|@oTj>K6!Wj%TslH{xBb}x+n-R8?UNj z{P0jcRK@M0`oHIDHut{#q5) zay5O7jEm|4VPC3ZHD{)}E%7UEO^@pDuT%Yfw4BTrRq?c#zf5ncn__xfOjpIv<7M%T zofOG#ss+&^=h^xB+39sLp4}8zXV{&ci5^Gn&BiPf@0EQh*SGv zdb^tAKjBl)COj7&$4^hF*JuLfkMzY8{&+fnULweCwWvsrrz#ExeqI&xSw%Bz&bDG! z-SPBYt`^v=!|i9i`WXrS+0VCR(5*+hd8UR=?#Ht`6s`?f^ALx{CC*T^n)Bsy`t>0` z{CHK&=Bo%_#O5L=ek6UOhkuAVyt>EVM7np>q#6~~W4^)%SiZwfc=(0BySRQ=eEPs* zC+};k(rWYL{3hClo5SL7^XFoDKYxC>!PZujf6&jbPcE*$Tz~xX`Ru2&&uEpN#*6V| zg|?9!+E}ui=V*rLCm-<9KgLJL$Nq}l|GcQU^M5$IKB3P4%XslPpH1<1Ke3Hkc8uC3 zYQ|l)=!^mO!b34@EXRF=vNmCk|j zj^osrtrqi#|LO4jJY8Xrb(LwKXIVc$t*kh-47p82zl!bnT^wv_e{&`6F!9s=KC=3^ z`|$k{WoSAO`KB{8z}xt1=pW~+jq?2=9#a1a&6YS@QPDU0xRd*HH6FzJ;}E_Df)Ufz z$q%1CoUv2#@0X+T=;HM1S_oscd$-lG$0}GQ@V|GRHlg9`&&j0pMht(`QT%8=+j?_s zf*1O5{cTjgV=26__*FAEk6ZI9K1@`c=^Z@`ynCo-cdPqjse?4aO6Um4jevp>lv5o6 zwH5$ui9{*U5zrd}0HdX|9Rbq_coD!z+eNUAfCGdO)^!A2BVYn3Z*8}cz7fz?S&i-N z2!xFQNKp8!-HsBCgfd2Hr@M^+Xh{fR4RjwAT_U5QP13a4~1 zy4zc@=}#ez0u{Os3r&Xxp^SE>+g;Ffs0Ro}LXV^AQ0IknQumtKbSRF9^uc#2WYeFh z{n8lOrI1Z`Y7a8>`${&wX+Q~WY`44rYoirLI-ILriob2zh-Nx+7ZcQWkm|!BX0_rF3r|)>3Uoi!&)D1)+`Qz;zX*ZkK9H>9C_+tr1cl%~V>d z-Cy2_Wa}YpVFlQM7&W_p9mXg*dH3t{&mSqxF?-(N4dPp?-YKNte zzAc8!L-GM6v>p?bbFjl|w%dj)b4YisXd72tlXjSMXt&J*^j7(rRBVa)v?V}CqAjN7 zj)@8FM?iDeTZrmWXg@;q4bt5U?NM;pjXOx~mY57X(9D*Y(L3~LOAIwUJy_6w1Q%e3 zHI{ly%!VEM*6v<(gH5t+=)kna&>7jWWXiUf^=3!gI%0OmByDSmcj#xO+qU%xxD4!= zfb9;!6=jF^%GQ&jRzc5MuhCkMfbO5>4v8u0FE`^=g}xbt5r~m=*Ahx0OY9matZ)Iv z?tUIGzpUdh3O2t?=8umUQ6G;+|GUK4^XPH5I$Eygi}77`^t711Am-8IVlu6sj~3Ok z8ZRdIM;M8XF|awhym)^UQ+h`?)yq-!b^3+yr;ktb#R`8spHwl`ebf!QdSAqrj}M7| zG+I=*f1nkAJw1w9p2uo=bbqHj{qOL0{O1Cv=^L#fuy7bgPLJ;F_*OcY{LB4n`7r%DieFp& z8okc1dhAmOr}coXKNq9%c7NStn~v`8qhIatQAX8Eb@Vd19W_rriqEjDR?DNCuglpJ z|Nrm>j5@?8IYimRyI+gZq_})vjOO1)C7M=zEPnl#O3BUmbMZdTt*hr*@o};$q${K> zg)Oxy;(Yt=`_sRC{`kYer;k?$m+ucgpPe6^e7HXN>wlblIN*JugP5s2;8gcPm+3n= z0Aaj3SS*bP;X#Xoi-VJc!-Ke!bZ~NhjP}U*aBy~2UY(U6K9wiuCGvW~ad?kX6_+D720MW0v(4E(+i#|eJ^f3tDM0h|K%*4-8x?E#r@Lk{r z9khVpfZi3ZY)T<|XHdON4XOhmcvr$9M(sL&ilSu;E8~BAe2qa|#Mfx!&;<#IezAp* z8X{BrBT0QcX%9#cJco@eTVR=BAVCmBfO9fn1kg8}I7<^_9BhOZ81~>GTa8|D$+Be& zEE5bPu+AcZ)HWvS4^BH)6inKR8d|G_7LJUk#o(}H>9PUK@UOu;qiApyMz+=&gzro+ z@IFQ0tfjdneDoXuO?HbfvS&N#A$QMzowWr{s00xl^?VWm~hL$rdY9R}qR4AL56pg0q} zawe24TyDTJ{;T*FomUy-d>evvM)X1wfu2<#9~2EJ06E0;W8y>Mas!s}-vB-?Jc#eC zQ^p6^i+7ZeJ^hKsR6CAv$j$Rsd-DIHbh$yx1Vahpg65oYN=t+Ar~d?k8Z8qHB2d-= zCDsH0vh`!m$ePG!THvrJI@?-m&CWLpmm9E*KL8)6LtMuIjMoO^PIRq1W2}QoWxpiQ z(j_U}R|d$^MOkvK(YNqMnHEL!UFO)$p)JKa;A6lDQXg~);=Xc`lcXp}L&QIwL2 zo^}f?6AU7-Kudr0#sUa~_}g)gN+x2IXNyNO&RXYeq*JQgAZ75~;6}u?9!EfcvcB(b zN!3gLMn75y8cmu{5`8O0Y$#lAz%u^p_`yJ2W~2@1!kV6|6)Uuo8(g{-+JgZ>%YKwD z8?X%jP2rvLnzvSzbGU?wMBc{sGG{SaAPnQjnJWawLtc~_V~d$N%q)Dj8DBn zd)(Re=}%s&3h-ozF;*xk%_>=;3~$BNV3}YDftO%uLlk{=P#8A#2iciaycH-1D&kHw zpnswi;iGKX0?P!$2&|x(R|oJx)85;~A0}9XWrDXtKouZ>W)U4prTdi!Ch2?v7@o`lT*GCGeJGrYj*F%|5%hNVqJd0hT|Mou;;-kvT)g8W%S=d*Fuo9 z?!6Jo@@5RK67Zbo7Mil%h(_*ulr1+{8U9k*u@8mRR6r79ASDd+bLbwD<+ctOl9K06$CLFXOP4LUOtA-r5$rtTa0zLH z=$Dd!O?&X1`IF>cU`rLHoT7Bug3AzeZ z9P3jJmI($CSbl)V3XEXqtZ+R~`MyHy7QBfC%It)3l^%m-_}AcZSjfOB zi3^VI=b3ignV`s^0Uc4cLRdaK&UYrP!7~2q_|huAx+*wl4Y)o*|FC`pGE}M(`0hNb zP2dt$$%d^4%LKy+yx}cvCoGw|u7N|Sl!4Rhls6??h0;_o8HLLhT4or^P+ur1<4Xgq zLx$_#NUP90s?X;tX{;SrtlEI_pJGxztzQFXlKn}7uj{sPP4T9GgP4%X{kZy8PjH#1}?Btp>Wwk%M@>mLelLwi={^8@-lJUNv4B- z!Kd3ihx$kuivdfPFB`bb@Rk^Y=c8>>Tkq%={cat)PQ|oJ)Ddib7mt>F)<*Nmwi+!{ zyfF$8T=a{MbkM3_au|ff8oWi0?tbzVXdXaun zf(&(R8)vgby1M9vZwjfwGQ&^?^jH;Njf6HZ;9{EE=}($fF15gU7DrlnA-OUSrOOsv z=6I7F>-RF?JXML6)X3zKAHgqXu=NXKG-@YTW6Dirszj#Pha%>?*?*NLzWlI%{KqHL z=xkwyvV5G;@jI@faM?o36#Gzk73k~(1_eP|lckIeN=s zgO(}wps?tt)vp*+A;{qSb1!`)6rj$Y#hb*maiVo3v;oTuZ->Dt$Bt%@PD-O)|0WBQ zJo!>*MeD*VXFQ+zGE{D|2FnZs8KhF$(MW^T2B&8Kq6rWt5DNUw=Wf_IwF;u$^* zP@`pvy(kQoILP?w1%nZ5e2vC{H#pGC1T-H(r2;t6Qu75@{0@f(Ftfa27Ogqzi?>pQ zWeYH~{0&plQ?6ghb(iO)&^d{(8DHJYe6+pHH(_P1Cixa1A}mdu@^DTWI_AGnIbWrLOB2f+IV-t>poq|f@-RYNKc zExHgUaFUbG`qx+){#AIRc`L^R2VV8Ja%jX8Gs{>`Ueea4Mgz%=PO@#Q;WEem9CR6v z_sZcQrr!USJa7(t3yiB@7rUm^bl$u{%M^Q1U?kz3w=AG`2%ATZGN^%u3oqbw^Wr9Hj);Y}= zBZ0K&i={3j?3aAT)nD;tgq&)Z!ty8s)q6H*nPMLbU++Ka_!d0;)E{lrzZD>^LtfWc zgT>*t7#Z@FFs8m1poYsFZ<2$24?3TYx)Po6)6?$#aLF=IE`K63tGt5sXIr+=GQ}Pg zPJ^L@rAGTH=qzsc>H$dHOT#-{=v#ym4$jWjEvlcpHfm%5)u-ak|cPUP6bFEF;5X-nLaC`svVCe)|5wb%w-k59?kqPKbm`C?Se-JrPqBFw&;h*9IJoDvG6ZY z;NP<#j<@`x?H~0`onA1ku;{U0&+WwS1k}8_Q^1Nti|?_vPuqoq0fiFvM@3fDt2;Vz zi}CUBw7WV!sc~ZynZSFM|MT|SVjOdsfo7QJwO@D`P&mW3ZnI(u3(%JKquzQUGRx$j z_4%x@(7BiJaQ&$U$?^>r4*lF;Hn{P7f7#HqQ&PP5-M7X?7Z0=8<_RQnGBWt;%{^hp Q%)lUz76Q@?KtU(~07vs>ssI20 literal 0 HcmV?d00001 diff --git a/tests/testthat/test-check_imputation_cols.R b/tests/testthat/test-check_imputation_cols.R index 3131dd6..cc901bb 100644 --- a/tests/testthat/test-check_imputation_cols.R +++ b/tests/testthat/test-check_imputation_cols.R @@ -1,73 +1,79 @@ test_that("Check that imputation columns added correctly", { - ## The following test uses more than 2GB of memory, which is more - ## than what 32-bit Windows can handle: - is_32bit_windows <- .Platform$OS.type == "windows" #&& - #.Platform$r_arch == "i386" - if (!is_32bit_windows) { - pth <- system.file("extdata", "eduAttainOkbay.txt", - package = "MungeSumstats" + ## The following test uses more than 2GB of memory, which is more + ## than what 32-bit Windows can handle: + is_32bit_windows <- .Platform$OS.type == "windows" #&& + #.Platform$r_arch == "i386" + if (!is_32bit_windows) { + pth <- system.file("extdata", "eduAttainOkbay.txt", + package = "MungeSumstats" + ) + #only run not on linux to speed up linux bioc checks + if (Sys.info()["sysname"]!="Linux"){ + eduAttainOkbay <- data.table::fread(pth) + # edit to make an rs id be imputed + eduAttainOkbay[1, "MarkerName"] <- + substring( + eduAttainOkbay[1, "MarkerName"], 3, + nchar(eduAttainOkbay[1, "MarkerName"]) ) - eduAttainOkbay <- data.table::fread(pth) - # edit to make an rs id be imputed - eduAttainOkbay[1, "MarkerName"] <- - substring( - eduAttainOkbay[1, "MarkerName"], 3, - nchar(eduAttainOkbay[1, "MarkerName"]) - ) - # write to temp dir - file <- tempfile() - data.table::fwrite(eduAttainOkbay, file) - # run - reformatted <- MungeSumstats::format_sumstats(file, - ref_genome = "GRCh37", - compute_z = TRUE, - compute_n = 1001, - save_format='LDSC', - imputation_ind = TRUE, - allele_flip_check = TRUE, - dbSNP=144 - ) - res <- data.table::fread(reformatted) - col_headers <- names(res) - imputat_cols <- c( - col_headers[grepl("^IMPUTATION_", col_headers)], - "flipped"["flipped" %in% col_headers], - col_headers[grepl("^convert_", col_headers)] - ) - # just check imputation columns exist - expect_equal(length(imputat_cols) > 0, TRUE) - # also check all have at least 1 value present - have_value <- TRUE - for (col_i in imputat_cols) { - col_i_val <- res[[col_i]] - if (length(col_i_val[!is.na(col_i_val)]) == 0) { - have_value <- FALSE - } + # write to temp dir + file <- tempfile() + data.table::fwrite(eduAttainOkbay, file) + # run + reformatted <- MungeSumstats::format_sumstats(file, + ref_genome = "GRCh37", + compute_z = TRUE, + compute_n = 1001, + save_format='LDSC', + imputation_ind = TRUE, + allele_flip_check = TRUE, + dbSNP=144 + ) + res <- data.table::fread(reformatted) + col_headers <- names(res) + imputat_cols <- c( + col_headers[grepl("^IMPUTATION_", col_headers)], + "flipped"["flipped" %in% col_headers], + col_headers[grepl("^convert_", col_headers)] + ) + # just check imputation columns exist + expect_equal(length(imputat_cols) > 0, TRUE) + # also check all have at least 1 value present + have_value <- TRUE + for (col_i in imputat_cols) { + col_i_val <- res[[col_i]] + if (length(col_i_val[!is.na(col_i_val)]) == 0) { + have_value <- FALSE } - expect_equal(have_value, TRUE) - - # check other compute_n values - eduAttainOkbay <- data.table::fread(pth) - eduAttainOkbay[, N_CON := 100] - eduAttainOkbay[, N_CAS := 120] - # write to temp dir - file <- tempfile() - data.table::fwrite(eduAttainOkbay, file) - methods <- c("ldsc", "sum", "giant", "metal") - reformatted <- MungeSumstats::format_sumstats(file, - ref_genome = "GRCh37", - compute_n = methods, - on_ref_genome = FALSE, - strand_ambig_filter = FALSE, - bi_allelic_filter = FALSE, - allele_flip_check = FALSE, - dbSNP=144 - ) - res <- data.table::fread(reformatted) - expect_equal(all(paste0("Neff_", c("ldsc", "giant", "metal")) %in% - colnames(res)), TRUE) - } else { - expect_equal(is_32bit_windows, TRUE) - expect_equal(is_32bit_windows, TRUE) + } + expect_equal(have_value, TRUE) + } else{ + expect_equal(isTRUE(Sys.info()["sysname"]=="Linux"), TRUE) + expect_equal(isTRUE(Sys.info()["sysname"]=="Linux"), TRUE) } + # check other compute_n values + eduAttainOkbay <- data.table::fread(pth) + eduAttainOkbay[, N_CON := 100] + eduAttainOkbay[, N_CAS := 120] + # write to temp dir + file <- tempfile() + data.table::fwrite(eduAttainOkbay, file) + methods <- c("ldsc", "sum", "giant", "metal") + reformatted <- MungeSumstats::format_sumstats(file, + ref_genome = "GRCh37", + compute_n = methods, + on_ref_genome = FALSE, + strand_ambig_filter = FALSE, + bi_allelic_filter = FALSE, + allele_flip_check = FALSE, + dbSNP=144 + ) + res <- data.table::fread(reformatted) + expect_equal(all(paste0("Neff_", c("ldsc", "giant", "metal")) %in% + colnames(res)), TRUE) + } else { + expect_equal(is_32bit_windows, TRUE) + expect_equal(is_32bit_windows, TRUE) + expect_equal(is_32bit_windows, TRUE) + } }) diff --git a/tests/testthat/test-indels.R b/tests/testthat/test-indels.R index 2b9fc73..5867773 100644 --- a/tests/testthat/test-indels.R +++ b/tests/testthat/test-indels.R @@ -1,58 +1,46 @@ test_that("non-biallelic SNPs are removed", { - ## The following test uses more than 2GB of memory, which is more - ## than what 32-bit Windows can handle: - is_32bit_windows <- .Platform$OS.type == "windows" #&& - #.Platform$r_arch == "i386" - if (!is_32bit_windows && Sys.info()["sysname"]=="Linux") { - #test to ensure indels aren't removed - ss_indel <- data.table::data.table("SNP"=c("rs34589910","rs12987662"), - "CHR"=c(4,2), - "BP"=c(6364621,100821548), - "A1"=c("C","A"), - "A2"=c("CG","C"), - "Uniq.a1a2"=c("4:6364621_C_CG","aa"), - "EAF"=c(0.0945334,0.3787), - "BETA"=c(-0.00625732297153778,0.027), - "P"=c(0.4883341,2.693e-24)) - - reformatted_ss_ad <- - MungeSumstats::format_sumstats(ss_indel,ref_genome="GRCh37", - convert_small_p=TRUE, - allele_flip_check=TRUE, - snp_ids_are_rs_ids=TRUE, - return_data=TRUE, - nThread=2, - on_ref_genome = TRUE, - indels = TRUE, - dbSNP=144) - #SNP ID is an indel so won't exist in our SNP reference dataset - testthat::expect_equal("rs34589910" %in% reformatted_ss_ad$SNP,TRUE) - - #check that indel missing RS ID is removed rather than imputing wrong RS ID - ss_indel2 <- data.table::data.table("SNP"=c("4:6364621","2:100821548","rs9320913"), - "CHR"=c(4,2,6), - "BP"=c(6364621,100821548,98584733), - "A1"=c("C","A","A"), - "A2"=c("CG","C","C"), - "Uniq.a1a2"=c("4:6364621_C_CG","aa","bb"), - "EAF"=c(0.0945334,0.3787,0.4567), - "BETA"=c(-0.00625732297153778,0.027,0.0123), - "P"=c(0.4883341,2.693e-24,0.00000021)) - - reformatted_ss_ad2 <- - MungeSumstats::format_sumstats(ss_indel2,ref_genome="GRCh37", - convert_small_p=TRUE, - allele_flip_check=TRUE, - snp_ids_are_rs_ids=TRUE, - return_data=TRUE, - nThread=2, - on_ref_genome = TRUE, - indels = TRUE, - dbSNP=144) - #make sure it was removed - testthat::expect_equal(nrow(reformatted_ss_ad2)==2,TRUE) - } else { - testthat::expect_equal((is_32bit_windows||!Sys.info()["sysname"]=="Linux"), TRUE) - testthat::expect_equal((is_32bit_windows||!Sys.info()["sysname"]=="Linux"), TRUE) - } + ## The following test uses more than 2GB of memory, which is more + ## than what 32-bit Windows can handle: + is_32bit_windows <- .Platform$OS.type == "windows" #&& + #.Platform$r_arch == "i386" + if (!is_32bit_windows && Sys.info()["sysname"]=="Linux") { + #test to ensure indels aren't removed + # also test indel missing RS ID removed rather than imputing wrong RS ID + ss_indel <- data.table::data.table("SNP"=c("rs34589910","rs12987662", + "4:6364621"), + "CHR"=c(4,2,4), + "BP"=c(6364621,100821548,6364621), + "A1"=c("C","A","C"), + "A2"=c("CG","C","CG"), + "Uniq.a1a2"=c("4:6364621_C_CG","aa", + "4:6364621_C_CG"), + "EAF"=c(0.0945334,0.3787,0.0945334), + "BETA"=c(-0.00625732297153778,0.027, + -0.00625732297153778), + "P"=c(0.4883341,2.693e-24,0.4883341)) + + reformatted_ss_ad <- + MungeSumstats::format_sumstats(ss_indel,ref_genome="GRCh37", + convert_small_p=TRUE, + allele_flip_check=TRUE, + snp_ids_are_rs_ids=TRUE, + return_data=TRUE, + nThread=2, + on_ref_genome = TRUE, + indels = TRUE, + log_folder_ind = TRUE, + dbSNP=144) + #SNP ID is an indel so won't exist in our SNP reference dataset + testthat::expect_equal("rs34589910" %in% + reformatted_ss_ad$sumstats$SNP,TRUE) + #check that indel missing RS ID is removed rather than imputing + testthat::expect_equal(nrow(fread( + reformatted_ss_ad$log_files$snp_missing_rs)),1) + + } else { + testthat::expect_equal((is_32bit_windows|| + !Sys.info()["sysname"]=="Linux"), TRUE) + testthat::expect_equal((is_32bit_windows|| + !Sys.info()["sysname"]=="Linux"), TRUE) + } }) diff --git a/tests/testthat/test-multi_rs_snp_one_row.R b/tests/testthat/test-multi_rs_snp_one_row.R index 30bb596..bb4a598 100644 --- a/tests/testthat/test-multi_rs_snp_one_row.R +++ b/tests/testthat/test-multi_rs_snp_one_row.R @@ -1,109 +1,109 @@ test_that("Handle more than 1 rs IDs in one row", { - ## The following test uses more than 2GB of memory, which is more - ## than what 32-bit Windows can handle: - is_32bit_windows <- .Platform$OS.type == "windows" #&& - #.Platform$r_arch == "i386" - if (!is_32bit_windows && Sys.info()["sysname"]=="Linux") { - file <- tempfile() - # Remove data from line 3 to check it is deleted - eduAttainOkbay <- readLines(system.file("extdata", "eduAttainOkbay.txt", - package = "MungeSumstats" - )) - eduAttainOkbay_missing <- eduAttainOkbay - eduAttainOkbay_missing[3] <- - "rs9320913_rs1234_rs_45678\t6\t98584733\tA\tC\t0.5019\t0.024\t0.003\t2.457e-19" - # write the Educational Attainment GWAS to a temp file for testing - writeLines(eduAttainOkbay_missing, con = file) - - - # make changes for log check - file2 <- tempfile() - # multiple rs id already there - # data already has 8 strand ambiguous snps, 1 snp A1 A2 doesn't match ref gen - # add missing data(EAF) - eduAttainOkbay_missing[2] <- - "rs12987662\t2\t100821548\tA\tC\t\t0.027\t0.003\t2.693e-24" - # make beta 0 - eduAttainOkbay_missing[4] <- - "rs11712056\t3\t49914397\tT\tC\t0.5504\t0\t0.003\t3.304e-19" - # make se negative - eduAttainOkbay_missing[9] <- - "rs2456973\t12\t56416928\tA\tC\t0.6791\t-0.02\t-0.003\t1.064e-12" - # make duplicate SNP IDs - eduAttainOkbay_missing[10] <- - "rs165633\t12\t123767929\tA\tG\t0.2257\t0.023\t0.003\t1.258e-12" - # make duplicate base pair positions - bp from row 14 - eduAttainOkbay_missing[13] <- - "rs11191193\t4\t140764124\tA\tG\t0.6511\t0.018\t0.003\t5.444e-11" - # make snp missing rs but change chr and bp to not real ones so then removed - eduAttainOkbay_missing[15] <- - "11210860\t1\t-1\tA\tG\t0.3694\t0.017\t0.003\t2.359e-10" - # write the Educational Attainment GWAS to a temp file for testing - writeLines(eduAttainOkbay_missing, con = file2) - # Run MungeSumstats code - reformatted <- - MungeSumstats::format_sumstats(file, - ref_genome = "GRCh37", - on_ref_genome = FALSE, - strand_ambig_filter = FALSE, - bi_allelic_filter = FALSE, - allele_flip_check = FALSE, - imputation_ind = TRUE, - remove_multi_rs_snp = FALSE, - dbSNP=144 - ) - reformatted_lines <- data.table::fread(reformatted) - # Should equal org apart from this one line - writeLines(eduAttainOkbay, con = file) - org <- MungeSumstats::format_sumstats(file, - ref_genome = "GRCh37", - on_ref_genome = FALSE, - strand_ambig_filter = FALSE, - bi_allelic_filter = FALSE, - allele_flip_check = FALSE, - imputation_ind = TRUE, - remove_multi_rs_snp = FALSE, - dbSNP=144 - ) - org_lines <- data.table::fread(org) - - # remove imputation column - reformatted_lines[, convert_multi_rs_SNP := NULL] - # reordering makes line 3 got to 58 - testthat::expect_equal(reformatted_lines, org_lines) - - # check log files - # Run MungeSumstats code - reformatted_log <- - MungeSumstats::format_sumstats(file2, - ref_genome = "GRCh37", - on_ref_genome = TRUE, - strand_ambig_filter = TRUE, - bi_allelic_filter = TRUE, - allele_flip_check = TRUE, - imputation_ind = TRUE, - remove_multi_rs_snp = TRUE, - log_folder_ind = TRUE, - dbSNP=144 - ) - - # expect 8 log files - testthat::expect_equal(length(reformatted_log$log_files), 8) - # next check number of rows in each - results <- c() - for (log_i in reformatted_log$log_files) { - data_log_i <- data.table::fread(log_i) - if (grepl("snp_strand_ambiguous", log_i)) { - results <- c(results, nrow(data_log_i) == 8) - } else { - results <- c(results, nrow(data_log_i) == 1| - nrow(data_log_i) == 0) - } - } - expect_equal(all(results), TRUE) - } else { - expect_equal((is_32bit_windows||!Sys.info()["sysname"]=="Linux"), TRUE) - expect_equal((is_32bit_windows||!Sys.info()["sysname"]=="Linux"), TRUE) - expect_equal((is_32bit_windows||!Sys.info()["sysname"]=="Linux"), TRUE) + ## The following test uses more than 2GB of memory, which is more + ## than what 32-bit Windows can handle: + is_32bit_windows <- .Platform$OS.type == "windows" #&& + #.Platform$r_arch == "i386" + if (!is_32bit_windows && Sys.info()["sysname"]=="Linux") { + file <- tempfile() + # Remove data from line 3 to check it is deleted + eduAttainOkbay <- readLines(system.file("extdata", "eduAttainOkbay.txt", + package = "MungeSumstats" + )) + eduAttainOkbay_missing <- eduAttainOkbay + eduAttainOkbay_missing[3] <- + "rs9320913_rs1234_rs_45678\t6\t98584733\tA\tC\t0.5019\t0.024\t0.003\t2.457e-19" + # write the Educational Attainment GWAS to a temp file for testing + writeLines(eduAttainOkbay_missing, con = file) + + + # make changes for log check + file2 <- tempfile() + # multiple rs id already there + # data already has 8 strand ambiguous snps, 1 snp A1 A2 doesn't match ref gen + # add missing data(EAF) + eduAttainOkbay_missing[2] <- + "rs12987662\t2\t100821548\tA\tC\t\t0.027\t0.003\t2.693e-24" + # make beta 0 + eduAttainOkbay_missing[4] <- + "rs11712056\t3\t49914397\tT\tC\t0.5504\t0\t0.003\t3.304e-19" + # make se negative + eduAttainOkbay_missing[9] <- + "rs2456973\t12\t56416928\tA\tC\t0.6791\t-0.02\t-0.003\t1.064e-12" + # make duplicate SNP IDs + eduAttainOkbay_missing[10] <- + "rs165633\t12\t123767929\tA\tG\t0.2257\t0.023\t0.003\t1.258e-12" + # make duplicate base pair positions - bp from row 14 + eduAttainOkbay_missing[13] <- + "rs11191193\t4\t140764124\tA\tG\t0.6511\t0.018\t0.003\t5.444e-11" + # make snp missing rs but change chr and bp to not real ones so then removed + eduAttainOkbay_missing[15] <- + "11210860\t1\t-1\tA\tG\t0.3694\t0.017\t0.003\t2.359e-10" + # write the Educational Attainment GWAS to a temp file for testing + writeLines(eduAttainOkbay_missing, con = file2) + # Run MungeSumstats code + reformatted <- + MungeSumstats::format_sumstats(file, + ref_genome = "GRCh37", + on_ref_genome = FALSE, + strand_ambig_filter = FALSE, + bi_allelic_filter = FALSE, + allele_flip_check = FALSE, + imputation_ind = TRUE, + remove_multi_rs_snp = FALSE, + dbSNP=144 + ) + reformatted_lines <- data.table::fread(reformatted) + # Should equal org apart from this one line + writeLines(eduAttainOkbay, con = file) + org <- MungeSumstats::format_sumstats(file, + ref_genome = "GRCh37", + on_ref_genome = FALSE, + strand_ambig_filter = FALSE, + bi_allelic_filter = FALSE, + allele_flip_check = FALSE, + imputation_ind = TRUE, + remove_multi_rs_snp = FALSE, + dbSNP=144 + ) + org_lines <- data.table::fread(org) + + # remove imputation column + reformatted_lines[, convert_multi_rs_SNP := NULL] + # reordering makes line 3 got to 58 + testthat::expect_equal(reformatted_lines, org_lines) + + # check log files + # Run MungeSumstats code + reformatted_log <- + MungeSumstats::format_sumstats(file2, + ref_genome = "GRCh37", + on_ref_genome = FALSE,#TRUE, + strand_ambig_filter = TRUE, + bi_allelic_filter = FALSE,#TRUE, + allele_flip_check = FALSE,#TRUE, + imputation_ind = TRUE, + remove_multi_rs_snp = TRUE, + log_folder_ind = TRUE, + dbSNP=144 + ) + + # expect 5 log files + testthat::expect_equal(length(reformatted_log$log_files), 5) + # next check number of rows in each + results <- c() + for (log_i in reformatted_log$log_files) { + data_log_i <- data.table::fread(log_i) + if (grepl("snp_strand_ambiguous", log_i)) { + results <- c(results, nrow(data_log_i) == 8) + } else { + results <- c(results, nrow(data_log_i) == 1| + nrow(data_log_i) == 0) + } } + expect_equal(all(results), TRUE) + } else { + expect_equal((is_32bit_windows||!Sys.info()["sysname"]=="Linux"), TRUE) + expect_equal((is_32bit_windows||!Sys.info()["sysname"]=="Linux"), TRUE) + expect_equal((is_32bit_windows||!Sys.info()["sysname"]=="Linux"), TRUE) + } }) \ No newline at end of file diff --git a/tests/testthat/test-on_ref_genome.R b/tests/testthat/test-on_ref_genome.R index e827470..a2489db 100644 --- a/tests/testthat/test-on_ref_genome.R +++ b/tests/testthat/test-on_ref_genome.R @@ -1,59 +1,59 @@ test_that("SNPs not on reference genome are removed", { - ## The following test uses more than 2GB of memory, which is more - ## than what 32-bit Windows can handle: - is_32bit_windows <- .Platform$OS.type == "windows" #&& - #.Platform$r_arch == "i386" - if (!is_32bit_windows && Sys.info()["sysname"]=="Linux") { - file <- tempfile() - # Update ID from line 3 to check it is deleted - - # "rs79925071" is not on ref genome GRCh37 - eduAttainOkbay <- readLines(system.file("extdata", "eduAttainOkbay.txt", - package = "MungeSumstats" - )) - eduAttainOkbay_missing <- eduAttainOkbay - eduAttainOkbay_missing[3] <- - "rs79925071\t6\t98584733\tA\tC\t0.5019\t0.024\t0.003\t2.457e-19" - # write the Educational Attainment GWAS to a temp file for testing - writeLines(eduAttainOkbay_missing, con = file) - # Run MungeSumstats code - reformatted <- MungeSumstats::format_sumstats(file, - ref_genome = "GRCh37", - on_ref_genome = TRUE, - strand_ambig_filter = FALSE, - bi_allelic_filter = FALSE, - allele_flip_check = FALSE, - log_folder_ind = TRUE, - dbSNP=144 - ) - reformatted_lines <- readLines(reformatted$sumstats) - # Should equal org apart from this one line - writeLines(eduAttainOkbay, con = file) - org <- MungeSumstats::format_sumstats(file, - ref_genome = "GRCh37", - on_ref_genome = TRUE, - strand_ambig_filter = FALSE, - bi_allelic_filter = FALSE, - allele_flip_check = FALSE, - dbSNP=144 - ) - org_lines <- readLines(org) - # reordering in function, line 3 rs9320913 is now 58 - # expect_equal(setequal(reformatted_lines,org_lines[-58]),TRUE) - expect_equal(setequal(reformatted_lines, org_lines), TRUE) - - # also check get genome builds works - eduAttainOkbayPth <- system.file("extdata", "eduAttainOkbay.txt", - package = "MungeSumstats" - ) - sumstats_list <- list(ss1 = eduAttainOkbayPth) - ref_genomes <- get_genome_builds( - sumstats_list = sumstats_list, - sampled_snps = 50, - dbSNP = 144 - ) - expect_equal(all.equal(ref_genomes, list("ss1" = "GRCH37")), TRUE) - } else { - expect_equal((is_32bit_windows||!Sys.info()["sysname"]=="Linux"), TRUE) - expect_equal((is_32bit_windows||!Sys.info()["sysname"]=="Linux"), TRUE) - } + ## The following test uses more than 2GB of memory, which is more + ## than what 32-bit Windows can handle: + is_32bit_windows <- .Platform$OS.type == "windows" #&& + #.Platform$r_arch == "i386" + if (!is_32bit_windows && Sys.info()["sysname"]=="Linux") { + file <- tempfile() + # Update ID from line 3 to check it is deleted - + # "rs79925071" is not on ref genome GRCh37 + eduAttainOkbay <- readLines(system.file("extdata", "eduAttainOkbay.txt", + package = "MungeSumstats" + )) + eduAttainOkbay_missing <- eduAttainOkbay + eduAttainOkbay_missing[3] <- + "rs79925071\t6\t98584733\tA\tC\t0.5019\t0.024\t0.003\t2.457e-19" + # write the Educational Attainment GWAS to a temp file for testing + writeLines(eduAttainOkbay_missing, con = file) + # Run MungeSumstats code + reformatted <- MungeSumstats::format_sumstats(file, + ref_genome = "GRCh37", + on_ref_genome = TRUE, + strand_ambig_filter = FALSE, + bi_allelic_filter = FALSE, + allele_flip_check = FALSE, + log_folder_ind = TRUE, + infer_eff_direction = FALSE, + dbSNP=144 + ) + reformatted_lines <- data.table::fread(reformatted$sumstats) + # Should equal org apart from this one line + writeLines(eduAttainOkbay, con = file) + org <- MungeSumstats::format_sumstats(file, + ref_genome = "GRCh37", + on_ref_genome = FALSE,#TRUE, for speed + strand_ambig_filter = FALSE, + bi_allelic_filter = FALSE, + allele_flip_check = FALSE, + infer_eff_direction = FALSE, + dbSNP=144 + ) + org_lines <- data.table::fread(org) + #test + expect_equal(all.equal(reformatted_lines, org_lines), TRUE) + # also check get genome builds works + eduAttainOkbayPth <- system.file("extdata", "eduAttainOkbay.txt", + package = "MungeSumstats" + ) + sumstats_list <- list(ss1 = eduAttainOkbayPth) + ref_genomes <- get_genome_builds( + sumstats_list = sumstats_list, + sampled_snps = 50, + dbSNP = 144 + ) + expect_equal(all.equal(ref_genomes, list("ss1" = "GRCH37")), TRUE) + } else { + expect_equal((is_32bit_windows||!Sys.info()["sysname"]=="Linux"), TRUE) + expect_equal((is_32bit_windows||!Sys.info()["sysname"]=="Linux"), TRUE) + } })