Skip to content

Commit

Permalink
Catch for overflow when NA's in SNP for check_no_rs_snp()
Browse files Browse the repository at this point in the history
  • Loading branch information
Al-Murphy committed Feb 9, 2024
1 parent 33e8daa commit 009dfd8
Show file tree
Hide file tree
Showing 6 changed files with 39 additions and 19 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Package: MungeSumstats
Type: Package
Title: Standardise summary statistics from GWAS
Version: 1.11.5
Version: 1.11.6
Authors@R:
c(person(given = "Alan",
family = "Murphy",
Expand Down
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ importFrom(data.table,fwrite)
importFrom(data.table,key)
importFrom(data.table,rbindlist)
importFrom(data.table,set)
importFrom(data.table,setDF)
importFrom(data.table,setDT)
importFrom(data.table,setcolorder)
importFrom(data.table,setkey)
Expand Down
6 changes: 6 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,9 @@
## CHANGES IN VERSION 1.11.6

### Bug fix
* Catch for overflow when NA's in SNP col for `check_no_rs_snp()` check with
`imputation_ind=TRUE`.

## CHANGES IN VERSION 1.11.4

### Bug fix
Expand Down
24 changes: 18 additions & 6 deletions R/check_no_rs_snp.R
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ check_no_rs_snp <- function(sumstats_dt, path, ref_genome, snp_ids_are_rs_ids,
check_save_out,tabix_index, nThread, log_files,
dbSNP) {
SNP <- CHR <- CHR1 <- BP1 <- i.RefSNP_id <- IMPUTATION_SNP <-
SNP_old_temp <- SNP_INFO <- A1 <- A2 <- NULL
SNP_old_temp <- SNP_INFO <- A1 <- A2 <- .I <- NULL
# if snp ids aren't rs ids rename the column to ID's
# so RSIDs can be inferred
if ((!snp_ids_are_rs_ids) & sum("SNP" %in% names(sumstats_dt)) == 1) {
Expand Down Expand Up @@ -56,6 +56,10 @@ check_no_rs_snp <- function(sumstats_dt, path, ref_genome, snp_ids_are_rs_ids,
message("Checking SNP RSIDs.")
# needed for later to join and match SNPs
if (imputation_ind) {
# if any are NA, we need to give them a unique ID
# need to be able to identify to revert back so picked up
#by other, NA for RS ID check
sumstats_dt[is.na(SNP)|SNP=="", SNP := paste0("NA_",.I)]
sumstats_dt[, SNP_old_temp := SNP]
}
miss_rs <- sumstats_dt[!grep("^rs", SNP), ]
Expand Down Expand Up @@ -174,8 +178,8 @@ check_no_rs_snp <- function(sumstats_dt, path, ref_genome, snp_ids_are_rs_ids,
format <- c("BP1", "CHR1")
}
miss_rs_chr_bp[, (format) := data.table::tstrsplit(SNP,
split = ":", fixed = TRUE
)]#[c(1,2)]]
split = ":",
fixed = TRUE)]
# if BP col has other info after, drop it
if (sum(grepl("[[:punct:]].*", miss_rs_chr_bp$BP1)) > 0) {
miss_rs_chr_bp[, BP1 := gsub("([[:punct:]]).*", "", BP1)]
Expand Down Expand Up @@ -278,7 +282,8 @@ check_no_rs_snp <- function(sumstats_dt, path, ref_genome, snp_ids_are_rs_ids,
}
}
if (nrow(miss_rs) != nrow(sumstats_dt) && nrow(miss_rs) != 0) {
if (nrow(miss_rs_chr_bp) == 0) {
#Prev if (nrow(miss_rs_chr_bp) == 0) {
if (nrow(miss_rs_chr_bp) < nrow(miss_rs)) {
# don't filter twice if hit prev condition
# check if impute of correct SNP ID possible
if (sum(c("CHR", "BP") %in% col_headers) == 2 &&
Expand Down Expand Up @@ -346,11 +351,18 @@ check_no_rs_snp <- function(sumstats_dt, path, ref_genome, snp_ids_are_rs_ids,
# If user wants log, save it to there
if (log_folder_ind &&
nrow(sumstats_dt[!grep("^rs", SNP), ]) > 0) {
#if imputation ind on, NA's will be replaced with NA_i
#change back here
tmp_rmv <- sumstats_dt[!grepl("^rs", SNP), ]
if(imputation_ind){
tmp_rmv[, SNP_old_temp := NULL]
tmp_rmv[grepl("^NA_",SNP), SNP := NA]
}
name <- "snp_missing_rs"
name <- get_unique_name_log_file(name = name,
log_files = log_files)
write_sumstats(
sumstats_dt = sumstats_dt[!grepl("^rs", SNP), ],
sumstats_dt = tmp_rmv,
save_path =
paste0(
check_save_out$log_folder,
Expand Down Expand Up @@ -420,7 +432,7 @@ check_no_rs_snp <- function(sumstats_dt, path, ref_genome, snp_ids_are_rs_ids,
setkey(sumstats_dt, SNP_old_temp)
sumstats_dt[miss_rs, IMPUTATION_SNP := TRUE]
}
# remove temp column either way
# remove temp columns either way
sumstats_dt[, SNP_old_temp := NULL]
}
return(list("sumstats_dt" = sumstats_dt, "log_files" = log_files))
Expand Down
14 changes: 13 additions & 1 deletion R/format_sumstats.R
Original file line number Diff line number Diff line change
Expand Up @@ -433,7 +433,19 @@ format_sumstats <- function(path,
nThread = nThread
)
}


#If user inputted mapping file, validate
if(!identical(mapping_file, sumstatsColHeaders)) {
message("Non-standard mapping file detected.",
"Making sure all entries in `Uncorrected`",
" are in upper case.")
data.table::setDF(mapping_file)
#check again
if(!identical(mapping_file, sumstatsColHeaders)) {
mapping_file$Uncorrected <- toupper(mapping_file$Uncorrected)
}
}

#If es_is_beta remove from mapping file if present
if (!es_is_beta & nrow(mapping_file[mapping_file$Uncorrected=="ES" &
mapping_file$Corrected=="BETA",])>=1)
Expand Down
11 changes: 0 additions & 11 deletions R/standardise_sumstats_column_headers_crossplatform.R
Original file line number Diff line number Diff line change
Expand Up @@ -24,17 +24,6 @@ standardise_header <- standardise_sumstats_column_headers_crossplatform <-
mapping_file = sumstatsColHeaders,
uppercase_unmapped=TRUE,
return_list=TRUE) {

data.table::setDF(mapping_file)
if(!all.equal(
mapping_file,
sumstatsColHeaders
)) {
message(
"Non-standard mapping file detected.",
"Making sure all entries in `Uncorrected` are in upper case.")
mapping_file$Uncorrected <- toupper(mapping_file$Uncorrected)
}
message("Standardising column headers.")
message("First line of summary statistics file: ")
msg <- paste0(names(sumstats_dt), split = "\t")
Expand Down

0 comments on commit 009dfd8

Please sign in to comment.