From 8df9b70f18cd37d7d801ace7abb1b375adde2aa9 Mon Sep 17 00:00:00 2001 From: Al-Murphy Date: Thu, 13 Jul 2023 09:01:53 +0100 Subject: [PATCH] Add support for alternative chromosome styles --- R/format_sumstats.R | 12 ++++++++++-- R/validate_parameters.R | 15 ++++++++++++++- man/format_sumstats.Rd | 7 ++++++- man/import_sumstats.Rd | 3 +++ man/validate_parameters.Rd | 7 ++++++- 5 files changed, 39 insertions(+), 5 deletions(-) diff --git a/R/format_sumstats.R b/R/format_sumstats.R index 86e7bf3..2294c12 100644 --- a/R/format_sumstats.R +++ b/R/format_sumstats.R @@ -119,6 +119,9 @@ #' Ensembl styles both code chromosomes as `1-22, X, Y, MT`; the UCSC style is #' `chr1-chr22, chrX, chrY, chrM`; and the dbSNP style is #' `ch1-ch22, chX, chY, chMT`. Default is Ensembl. +#' @param rmv_chrPrefix Is now deprecated, do. not use. Use chr_style instead - +#' chr_style = 'Ensembl' will give the same result as rmv_chrPrefix=TRUE used to +#' give. #' @param rmv_chr Chromosomes to exclude from the formatted summary statistics #' file. Use NULL if no filtering is necessary. Default is `c("X", "Y", "MT")` #' which removes all non-autosomal SNPs. @@ -267,7 +270,10 @@ format_sumstats <- function(path, log_folder = tempdir(), imputation_ind = FALSE, force_new = FALSE, - mapping_file = sumstatsColHeaders) { + mapping_file = sumstatsColHeaders, + #deprecated parameters + rmv_chrPrefix = NULL + ) { #### Setup multi-threading #### data.table::setDTthreads(threads = nThread) #### Setup empty variables #### @@ -346,7 +352,9 @@ format_sumstats <- function(path, log_mungesumstats_msgs = log_mungesumstats_msgs, mapping_file = mapping_file, tabix_index = tabix_index, - chain_source = chain_source + chain_source = chain_source, + #deprecated parameters + rmv_chrPrefix = rmv_chrPrefix ) # save messages to file if user specified diff --git a/R/validate_parameters.R b/R/validate_parameters.R index 019e84b..0d9686e 100644 --- a/R/validate_parameters.R +++ b/R/validate_parameters.R @@ -43,7 +43,9 @@ validate_parameters <- function(path, log_mungesumstats_msgs, mapping_file, tabix_index, - chain_source) { + chain_source, + #deprecated parameters + rmv_chrPrefix) { # Checking if the file exists should happen first - # can pass dt/df of sumstats pth_msg <- paste0( @@ -395,4 +397,15 @@ validate_parameters <- function(path, collapse = "\n")) stop(tbx_msg) } + + #deprecated parameters + if (!is.null(rmv_chrPrefix)) { + dep_msg <- paste0( + "The parameter rmv_chrPrefix is now deprecated, please use chr_style ", + "instead.\nThe default of rmv_chrPrefix = True will give the same ", + "result as using chr_style = 'Ensembl'." + ) + stop(dep_msg) + } + rmv_chrPrefix = rmv_chrPrefix } diff --git a/man/format_sumstats.Rd b/man/format_sumstats.Rd index a763c4a..7e1ab9a 100644 --- a/man/format_sumstats.Rd +++ b/man/format_sumstats.Rd @@ -57,7 +57,8 @@ format_sumstats( log_folder = tempdir(), imputation_ind = FALSE, force_new = FALSE, - mapping_file = sumstatsColHeaders + mapping_file = sumstatsColHeaders, + rmv_chrPrefix = NULL ) } \arguments{ @@ -288,6 +289,10 @@ However, if a column header that is in youf file is missing of the mapping we give is incorrect you can supply your own mapping file. Must be a 2 column dataframe with column names "Uncorrected" and "Corrected". See data(sumstatsColHeaders) for default mapping and necessary format.} + +\item{rmv_chrPrefix}{Is now deprecated, do. not use. Use chr_style instead - +chr_style = 'Ensembl' will give the same result as rmv_chrPrefix=TRUE used to +give.} } \value{ The address for the modified sumstats file or the actual data diff --git a/man/import_sumstats.Rd b/man/import_sumstats.Rd index 9285bdd..4539c5c 100644 --- a/man/import_sumstats.Rd +++ b/man/import_sumstats.Rd @@ -143,6 +143,9 @@ statistics file ("NCBI", "UCSC", "dbSNP", or "Ensembl"). The NCBI and Ensembl styles both code chromosomes as \verb{1-22, X, Y, MT}; the UCSC style is \verb{chr1-chr22, chrX, chrY, chrM}; and the dbSNP style is \verb{ch1-ch22, chX, chY, chMT}. Default is Ensembl.} + \item{\code{rmv_chrPrefix}}{Is now deprecated, do. not use. Use chr_style instead - +chr_style = 'Ensembl' will give the same result as rmv_chrPrefix=TRUE used to +give.} \item{\code{rmv_chr}}{Chromosomes to exclude from the formatted summary statistics file. Use NULL if no filtering is necessary. Default is \code{c("X", "Y", "MT")} which removes all non-autosomal SNPs.} diff --git a/man/validate_parameters.Rd b/man/validate_parameters.Rd index d9cd1c4..ecf7a79 100644 --- a/man/validate_parameters.Rd +++ b/man/validate_parameters.Rd @@ -45,7 +45,8 @@ validate_parameters( log_mungesumstats_msgs, mapping_file, tabix_index, - chain_source + chain_source, + rmv_chrPrefix ) } \arguments{ @@ -219,6 +220,10 @@ data(sumstatsColHeaders) for default mapping and necessary format.} \item{chain_source}{source of the chain file to use in liftover, if converting genome build ("ucsc" or "ensembl"). Note that the UCSC chain files require a license for commercial use. The Ensembl chain is used by default ("ensembl").} + +\item{rmv_chrPrefix}{Is now deprecated, do. not use. Use chr_style instead - +chr_style = 'Ensembl' will give the same result as rmv_chrPrefix=TRUE used to +give.} } \value{ No return