From c55aaa79a9fd926c0473e7243ec6f96e68c3d888 Mon Sep 17 00:00:00 2001 From: Al-Murphy Date: Fri, 8 Dec 2023 15:27:32 +0000 Subject: [PATCH] update check ldsc n comp --- DESCRIPTION | 2 +- NEWS.md | 6 ++++++ R/check_ldsc_format.R | 32 ++++++++++++++++++++----------- R/sort_coords.R | 44 +++++++++++++++++++++---------------------- man/sort_coords.Rd | 3 --- 5 files changed, 49 insertions(+), 38 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 42ba912..40e8751 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: MungeSumstats Type: Package Title: Standardise summary statistics from GWAS -Version: 1.11.1 +Version: 1.11.2 Authors@R: c(person(given = "Alan", family = "Murphy", diff --git a/NEWS.md b/NEWS.md index 2ed4de5..c13f8cf 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,9 @@ +## CHANGES IN VERSION 1.11.2 + +### Bug fix +* Remove unused argument `make_ordered` from `sort_coords()` +* Issue fixed with check ldsc format wehn compute_n type chosen + ## CHANGES IN VERSION 1.11.1 ### Bug fix diff --git a/R/check_ldsc_format.R b/R/check_ldsc_format.R index 58c7e5a..335e3f9 100644 --- a/R/check_ldsc_format.R +++ b/R/check_ldsc_format.R @@ -34,20 +34,30 @@ check_ldsc_format <- function(sumstats_dt, save_format, convert_n_int, message() allele_flip_check <- TRUE } - if (!compute_z && !z_present) { - message("Setting `compute_z=TRUE` to comply with LDSC format.") - compute_z <- TRUE - } n_msg <- paste0( - "LDSC requires an N column but your dataset doesn't ", - "appear to have one. You can impute an N value for ", - "all your SNPs\nby setting `compute_n` to this value but", - " note this is may not be correct and may lead to ", - "different results from LDSC\nthan if the true N per SNP", - " was known." + "LDSC requires an N column but your dataset doesn't ", + "appear to have one. You can impute an N value for ", + "all your SNPs\nby setting `compute_n` to this value but", + " note this is may not be correct and may lead to ", + "different results from LDSC\nthan if the true N per SNP", + " was known." ) + if (!z_present && isFALSE(compute_z)) { + beta_se_present <- ("BETA" %in% names(sumstats_dt) && + "SE" %in% names(sumstats_dt)) + p_present <- "P" %in% names(sumstats_dt) + if(beta_se_present){ + message("Setting `compute_z=BETA` to comply with LDSC format.") + compute_z <- "BETA" + }else if(p_present){ + message("Setting `compute_z=P` to comply with LDSC format.") + compute_z <- "P" + } else{ + stop(n_msg) + } + } if (compute_n == 0L && !n_present) { - stop(n_msg) + stop(n_msg) } } return(list( diff --git a/R/sort_coords.R b/R/sort_coords.R index f3847b2..cf55b27 100644 --- a/R/sort_coords.R +++ b/R/sort_coords.R @@ -4,8 +4,6 @@ #' @param sumstats_dt \link[data.table]{data.table} obj of the #' summary statistics file for the GWAS. #' @param sort_coords Whether to sort by coordinates. -#' @param make_ordered Make CHR into an ordered factor to ensure -#' they go from 1-22, X, Y. #' @param sort_method Method to sort coordinates by: #' \itemize{ #' \item{"data.table" (default)}{Uses \link[data.table]{setorderv}, @@ -22,26 +20,26 @@ sort_coords <- function(sumstats_dt, sort_coordinates = TRUE, sort_method=c("data.table","GenomicRanges")) { - ### Add this to avoid confusing BiocCheck - CHR <- NULL - - if (isTRUE(sort_coordinates)) { - #### Report #### - sort_method <- sort_method[1] - messager("Sorting coordinates with",paste0(shQuote(sort_method),".")) - ### Double check that X and Y are uppercase - sumstats_dt[, CHR := gsub("x|23", "X", CHR)] - sumstats_dt[, CHR := gsub("y", "Y", CHR)] - sumstats_dt[, CHR := gsub("mt", "MT", CHR)] - #### Sort #### - if(sort_method=="data.table"){ - sumstats_dt <- sort_coords_datatable(sumstats_dt = sumstats_dt) - } else if (sort_method=="GenomicRanges"){ - sumstats_dt <- sort_coord_genomicranges(sumstats_dt = sumstats_dt) - } - ### Now set CHR back to character to avoid issues - # when merging with other dts - sumstats_dt[,CHR:=as.character(CHR)] + ### Add this to avoid confusing BiocCheck + CHR <- NULL + + if (isTRUE(sort_coordinates)) { + #### Report #### + sort_method <- sort_method[1] + messager("Sorting coordinates with",paste0(shQuote(sort_method),".")) + ### Double check that X and Y are uppercase + sumstats_dt[, CHR := gsub("x|23", "X", CHR)] + sumstats_dt[, CHR := gsub("y", "Y", CHR)] + sumstats_dt[, CHR := gsub("mt", "MT", CHR)] + #### Sort #### + if(sort_method=="data.table"){ + sumstats_dt <- sort_coords_datatable(sumstats_dt = sumstats_dt) + } else if (sort_method=="GenomicRanges"){ + sumstats_dt <- sort_coord_genomicranges(sumstats_dt = sumstats_dt) } - return(sumstats_dt) + ### Now set CHR back to character to avoid issues + # when merging with other dts + sumstats_dt[,CHR:=as.character(CHR)] + } + return(sumstats_dt) } diff --git a/man/sort_coords.Rd b/man/sort_coords.Rd index 0715072..0ce376f 100644 --- a/man/sort_coords.Rd +++ b/man/sort_coords.Rd @@ -25,9 +25,6 @@ but much slower than the "data.table" method.} }} \item{sort_coords}{Whether to sort by coordinates.} - -\item{make_ordered}{Make CHR into an ordered factor to ensure -they go from 1-22, X, Y.} } \value{ Sorted sumstats_dt