Skip to content

Commit

Permalink
v1.2.8
Browse files Browse the repository at this point in the history
Bug fixes
- Removed duplicated entries in theoretical precursor peptides (with benefits in search speeds, such as, of phosphopeptide workflows).
- Removed subsequently duplicated entries in PSM entries.
- Removed duplicated PSMs with workflows of multiple offsets in precursor masses.
- Included `0` as part of the off-sets in precursor masses.
  • Loading branch information
qzhang503 committed Jun 17, 2023
1 parent eaccf2c commit 2be455b
Show file tree
Hide file tree
Showing 32 changed files with 490 additions and 490 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Package: mzion
Type: Package
Title: Database Searches of Proteomic Mass-spectrometrirc Data
Version: 1.2.7
Version: 1.2.8
Authors@R:
person(given = "Qiang",
family = "Zhang",
Expand Down
73 changes: 23 additions & 50 deletions R/bin_masses.R
Original file line number Diff line number Diff line change
Expand Up @@ -5,16 +5,14 @@
#' @param res The results from \link{calc_pepmasses2}.
#' @param min_mass A minimum mass of precursors.
#' @param max_mass A maximum mass of precursors.
#' @param sys_ram A putative value of system RAM.
#' @inheritParams matchMS
#' @inheritParams load_mgfs
#' @inheritParams calc_pepmasses2
bin_ms1masses <- function (res = NULL, min_mass = 200L, max_mass = 4500L,
min_len = 7L, max_len = 40L, ppm_ms1 = 20L,
use_ms1_cache = TRUE, .path_cache = NULL,
.path_ms1masses = NULL, is_ms1_three_frame = TRUE,
out_path = NULL, enzyme = "trypsin_p",
sys_ram = 24L)
out_path = NULL, enzyme = "trypsin_p")
{
old_opts <- options()
options(warn = 1L)
Expand Down Expand Up @@ -84,37 +82,8 @@ bin_ms1masses <- function (res = NULL, min_mass = 200L, max_mass = 4500L,
idxes <- idxes[order(idxes)]
})

if (FALSE) {
n_cores <- local({
fct <- 20
free_mem <- find_free_mem(sys_ram)
max_sz <- max(file.size(file.path(.path_mass, masses)))/1024^2

n_cores <- min(floor(free_mem/max_sz/fct), detect_cores(8L))

if (n_cores < 1L) {
warning("May be out of RAM with large peptide tables.")
n_cores <- 1L
}

n_cores
})
}
n_cores <- set_bin_ncores(len_m, enzyme)

n_cores <- local({
n_cores <- detect_cores(15L)

if (len_m > n_cores)
n_cores <- min(floor(n_cores/2L), len_m)
else
n_cores <- min(n_cores, len_m)

if (enzyme == "noenzyme")
n_cores <- floor(n_cores/2L)

n_cores <- max(1L, n_cores)
})

if (n_cores > 1L) {
cl <- parallel::makeCluster(getOption("cl.cores", n_cores))

Expand Down Expand Up @@ -311,23 +280,8 @@ binTheoSeqs <- function (idxes = NULL, res = NULL, min_mass = 200L,
res <- lapply(res, `[[`, "data")
gc()

n_cores <- local({
len <- length(res)
n_cores <- detect_cores(15L)

if (len > n_cores)
n_cores <- min(floor(n_cores/2L), len)
else
n_cores <- min(n_cores, len)

if (enzyme == "noenzyme")
n_cores <- floor(n_cores/2L)

n_cores <- max(1L, n_cores)
})

n_cores <- set_bin_ncores(length(res), enzyme)
cl <- parallel::makeCluster(getOption("cl.cores", n_cores))

parallel::clusterExport(cl, list("qread", "qsave"),
envir = environment(qs::qsave))
parallel::clusterExport(cl, c("bin_theoseqs", "find_ms1_cutpoints"),
Expand All @@ -340,7 +294,6 @@ binTheoSeqs <- function (idxes = NULL, res = NULL, min_mass = 200L,
ppm_ms1 = ppm_ms1),
SIMPLIFY = FALSE, USE.NAMES = FALSE,
.scheduling = "dynamic")

parallel::stopCluster(cl)
rm(list = c("res"))
gc()
Expand Down Expand Up @@ -394,3 +347,23 @@ s_readRDS <- function (file, out_path)
}


#' Sets the number of CPU cores for precursor mass binning.
#'
#' @param len_m The number of \code{aa_masses} modules.
#' @param enzyme The assume enzymatic activity.
set_bin_ncores <- function (len_m, enzyme)
{
n_cores <- detect_cores(15L)

n_cores <- if (len_m > n_cores)
min(floor(n_cores/2L), len_m)
else
min(n_cores, len_m)

if (enzyme == "noenzyme")
n_cores <- floor(n_cores/2L)

max(1L, n_cores)
}


15 changes: 3 additions & 12 deletions R/fastas.R
Original file line number Diff line number Diff line change
Expand Up @@ -34,30 +34,24 @@
#' head(names(fasta))
#' }
#'
#' @import dplyr purrr
#' @importFrom magrittr %>% %T>% %$% %<>%
#' @seealso \code{\link{write_fasta}}
#' @export
read_fasta <- function (file = NULL, acc_pattern = ">([^ ]+?) .*",
comment_char = "")
{
lines <- readLines(file)

# removes empty lines
lines <- readLines(file)
empties <- grep("^\\s*$", lines)

if (length(empties))
lines <- lines[-empties]

rm(list = c("empties"))


# removes comment lines
if (nchar(comment_char))
lines <- lines[!grepl(paste0("^", comment_char), lines)]

# begins and ends
headers <- grep(">", lines)
begins <- headers + 1L
begins <- headers + 1L
ends <- c(headers[-1L] - 1L, length(lines))

seqs <- mapply(function (x, y) {
Expand Down Expand Up @@ -88,9 +82,6 @@ read_fasta <- function (file = NULL, acc_pattern = ">([^ ]+?) .*",
#' fasta_db <- read_fasta(file = "~/mzion/dbs/fasta/uniprot/uniprot_hs_2020_05.fasta")
#' write_fasta(fasta_db, "~/mzion/examples/my.fasta")
#' }
#'
#' @import dplyr purrr
#' @importFrom magrittr %>% %T>% %$% %<>%
write_fasta <- function (fasta_db, file)
{
filepath <- gsub("(^.*/).*$", "\\1", file)
Expand Down
Loading

0 comments on commit 2be455b

Please sign in to comment.