Skip to content

Commit

Permalink
enable local chain files
Browse files Browse the repository at this point in the history
  • Loading branch information
Al-Murphy committed Apr 24, 2024
1 parent bf83eaa commit 16afb54
Show file tree
Hide file tree
Showing 12 changed files with 104 additions and 5 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Package: MungeSumstats
Type: Package
Title: Standardise summary statistics from GWAS
Version: 1.11.9
Version: 1.11.10
Authors@R:
c(person(given = "Alan",
family = "Murphy",
Expand Down
6 changes: 6 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,9 @@
## CHANGES IN VERSION 1.11.10

### New features
* Can now pass local chain files for liftover (`local_chain` in
`format_sumstats()` and `liftover()`).

## CHANGES IN VERSION 1.11.9

### New features
Expand Down
10 changes: 9 additions & 1 deletion R/format_sumstats.R
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,11 @@
#' @param chain_source source of the chain file to use in liftover, if converting
#' genome build ("ucsc" or "ensembl"). Note that the UCSC chain files require a
#' license for commercial use. The Ensembl chain is used by default ("ensembl").
#' @param local_chain Path to local chain file to use instead of downlaoding.
#' Default of NULL i.e. no local file to use. NOTE if passing a local chain file
#' make sure to specify the path to convert from and to the correct build like
#' GRCh37 to GRCh38. We can not sense check this for local files. The chain file
#' can be submitted as a gz file (as downloaed from source) or unzipped.
#' @param convert_small_p Binary, should non-negative
#' p-values <= 5e-324 be converted to 0?
#' Small p-values pass the R limit and can cause errors with LDSC/MAGMA and
Expand Down Expand Up @@ -239,6 +244,7 @@ format_sumstats <- function(path,
ref_genome = NULL,
convert_ref_genome = NULL,
chain_source = "ensembl",
local_chain = NULL,
convert_small_p = TRUE,
convert_large_p = TRUE,
convert_neg_p = TRUE,
Expand Down Expand Up @@ -377,6 +383,7 @@ format_sumstats <- function(path,
mapping_file = mapping_file,
tabix_index = tabix_index,
chain_source = chain_source,
local_chain = local_chain,
drop_na_cols = drop_na_cols,
#deprecated parameters
rmv_chrPrefix = rmv_chrPrefix
Expand Down Expand Up @@ -1047,7 +1054,8 @@ format_sumstats <- function(path,
convert_ref_genome = convert_ref_genome,
ref_genome = ref_genome,
imputation_ind = imputation_ind,
chain_source = chain_source
chain_source = chain_source,
local_chain = local_chain
)
#update ref genome of data
if(!is.null(convert_ref_genome))
Expand Down
25 changes: 23 additions & 2 deletions R/liftover.R
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ liftover <- function(sumstats_dt,
end_col = start_col,
as_granges = FALSE,
style = "NCBI",
local_chain = NULL,
verbose = TRUE) {

IMPUTATION_gen_build <- width <- strand <- end <- seqnames <- NULL;
Expand All @@ -63,6 +64,14 @@ liftover <- function(sumstats_dt,
"ensembl")){
stop(chain_msg)
}
#check local chain file
if (!is.null(local_chain) && !file.exists(local_chain)){
lcl_chain_msg <- paste0(
"The local_chain parameter is invalid, please chose a valid path to a ",
"local chain file or leave as NULL to download a chain file."
)
stop(lcl_chain_msg)
}
#Only continue with checks if user specifies a genome to convert to
if (!is.null(convert_ref_genome)){
#### Map genome build synonyms ####
Expand Down Expand Up @@ -110,12 +119,24 @@ liftover <- function(sumstats_dt,
end.field = end_col
)
#### Specify chain file ####
chain <- get_chain_file(
if (is.null(local_chain)) {
if (verbose) {message("Downloading chain file...")}
chain <- get_chain_file(
from = query_ucsc,
to = target_ucsc,
chain_source = chain_source,
verbose = verbose
)
)
}
else {
if (verbose) {message("Using local chain file...")}
#unzip if necessary
filetype = summary(file(local_chain))$class
if(filetype=='gzfile'){
local_chain <- R.utils::gunzip(local_chain, overwrite=TRUE)
}
chain <- rtracklayer::import.chain(local_chain)
}
#### Liftover ####
gr_lifted <- unlist(rtracklayer::liftOver(
x = gr,
Expand Down
9 changes: 9 additions & 0 deletions R/validate_parameters.R
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ validate_parameters <- function(path,
mapping_file,
tabix_index,
chain_source,
local_chain,
drop_na_cols,
#deprecated parameters
rmv_chrPrefix) {
Expand Down Expand Up @@ -90,6 +91,14 @@ validate_parameters <- function(path,
"ensembl")){
stop(chain_msg)
}
#check local chain file
if (!is.null(local_chain) && !file.exists(local_chain)){
lcl_chain_msg <- paste0(
"The local_chain parameter is invalid, please chose a valid path to a ",
"local chain file or leave as NULL to download a chain file."
)
stop(lcl_chain_msg)
}
#check dbSNP version
avail_dbSNP <- c(144,155)
dbSNP_msg <- paste0(
Expand Down
1 change: 1 addition & 0 deletions README.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -143,5 +143,6 @@ development:
* [Kitty Murphy](https://github.com/KittyMurphy)
* [Mykhaylo Malakhov](https://github.com/MykMal)
* [Alasdair Warwick](https://github.com/rmgpanw)
* [Ao Lu](https://github.com/leoarrow1)

# References
3 changes: 2 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
<!-- badges: start -->

[![](https://img.shields.io/badge/release%20version-1.10.1-black.svg)](https://www.bioconductor.org/packages/MungeSumstats)
[![](https://img.shields.io/badge/devel%20version-1.11.9-black.svg)](https://github.com/neurogenomics/MungeSumstats)
[![](https://img.shields.io/badge/devel%20version-1.11.10-black.svg)](https://github.com/neurogenomics/MungeSumstats)
[![R build
status](https://github.com/neurogenomics/MungeSumstats/workflows/rworkflows/badge.svg)](https://github.com/neurogenomics/MungeSumstats/actions)
[![](https://img.shields.io/github/last-commit/neurogenomics/MungeSumstats.svg)](https://github.com/neurogenomics/MungeSumstats/commits/master)
Expand Down Expand Up @@ -151,6 +151,7 @@ We would like to acknowledge all those who have contributed to
- [Kitty Murphy](https://github.com/KittyMurphy)
- [Mykhaylo Malakhov](https://github.com/MykMal)
- [Alasdair Warwick](https://github.com/rmgpanw)
- [Ao Lu](https://github.com/leoarrow1)

# References

Expand Down
27 changes: 27 additions & 0 deletions longtests/testthat/test-liftover.R
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,33 @@ test_that("liftover works", {
# drop 1 row from org not in chain files
ref_37_org <- ref_37_org[SNP %in% ref_37$SNP, ]
expect_equal(all.equal(ref_37_org, ref_37), TRUE)

#test local chain file
save_dir <- tempdir()
lcl_chain <- system.file("extdata",'GRCh37_to_GRCh38.chain.gz',
package="MungeSumstats")
copied <- R.utils::copyFile(
srcPathname = lcl_chain,
overwrite=TRUE,
save_dir)
reformatted4 <- MungeSumstats::format_sumstats(
path = file,
ref_genome = "GRCh37",
on_ref_genome = TRUE,
convert_ref_genome = "GRCh38",
strand_ambig_filter = FALSE,
bi_allelic_filter = FALSE,
allele_flip_check = FALSE,
log_folder_ind = TRUE,
imputation_ind = TRUE,
dbSNP=144,
local_chain = paste0(save_dir,'/GRCh37_to_GRCh38.chain.gz')
)
lcl_ss <- data.table::fread(reformatted$sumstats)
remote_ss <- data.table::fread(reformatted4$sumstats)
expect_equal(all.equal(lcl_ss,remote_ss))


} else {
expect_equal((is_32bit_windows||!Sys.info()["sysname"]=="Linux"), TRUE)
}
Expand Down
7 changes: 7 additions & 0 deletions man/format_sumstats.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 5 additions & 0 deletions man/import_sumstats.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

7 changes: 7 additions & 0 deletions man/liftover.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

7 changes: 7 additions & 0 deletions man/validate_parameters.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit 16afb54

Please sign in to comment.