From eaccf2c900260caf715618add8c43c483ffa9fcc Mon Sep 17 00:00:00 2001 From: Qiang Zhang <45829450+qzhang503@users.noreply.github.com> Date: Wed, 14 Jun 2023 19:37:20 -0500 Subject: [PATCH] v1.2.7 Updates: - codes optimization - notch searches (off-sets in precursor masses) - bug fixes --- DESCRIPTION | 2 +- R/bin_masses.R | 137 ++++--- R/funs.R | 6 +- R/ion_ladder.R | 242 +++-------- R/mgfs.R | 85 ++-- R/ms1_precursors.R | 110 +++-- R/ms2_gen.R | 98 ++--- R/ms2frames.R | 479 +++++++++++----------- R/msmsmatches.R | 319 +++++++------- R/msmsmatches2.R | 71 ++-- R/quant2.R | 112 ++--- R/scores.R | 154 +++---- R/utils_ui.R | 39 +- data-raw/ms2match_modules.R | 36 ++ man/add_ms1_13c.Rd | 13 +- man/add_ms1_notches.Rd | 20 + man/add_seions.Rd | 5 +- man/binTheoSeqs.Rd | 3 + man/bin_ms1masses.Rd | 13 + man/bions_base.Rd | 88 ++-- man/calc_aamasses.Rd | 5 + man/calc_monopep.Rd | 5 +- man/calc_monopeptide.Rd | 5 +- man/calc_ms2ions.Rd | 5 +- man/calc_ms2ions_a1_vnl0_fnl0.Rd | 6 +- man/calc_ms2ions_a1_vnl0_fnl1.Rd | 6 +- man/calc_ms2ions_a1_vnl1_fnl0.Rd | 6 +- man/calc_ms2ionseries.Rd | 5 +- man/calc_pepmasses2.Rd | 14 +- man/calc_pepprobs_i.Rd | 10 +- man/calc_pepscores.Rd | 18 +- man/calc_probi.Rd | 10 +- man/calc_probi_bypep.Rd | 10 +- man/calc_probi_byvmods.Rd | 10 +- man/calc_protfdr.Rd | 14 +- man/calc_protfdr_i.Rd | 14 +- man/calc_tmtint.Rd | 21 +- man/calcpepsc.Rd | 18 +- man/calib_mgf.Rd | 19 +- man/calib_ms1.Rd | 6 +- man/find_aa_masses.Rd | 5 + man/find_ms1_interval.Rd | 2 +- man/find_ms1_offsets.Rd | 19 + man/find_ms2_bypep.Rd | 1 - man/find_reporter_ints.Rd | 12 +- man/find_reporters_ppm.Rd | 4 +- man/frames_adv.Rd | 12 +- man/fuzzy_match_one.Rd | 25 -- man/fuzzy_match_one2.Rd | 57 --- man/gen_ms2ions_base.Rd | 18 +- man/hadd_primatches.Rd | 8 +- man/hcalc_tmtint.Rd | 17 +- man/hms2match.Rd | 14 +- man/hms2match_one.Rd | 130 ++++++ man/hpair_mgths.Rd | 48 +++ man/matchMS.Rd | 66 +-- man/matchMS_par_groups.Rd | 2 +- man/mframes_adv.Rd | 6 +- man/ms2ions_by_type.Rd | 5 +- man/ms2match.Rd | 26 +- man/ms2match_all.Rd | 6 +- man/ms2match_one.Rd | 9 +- man/{pair_mgftheo.Rd => pair_mgftheos.Rd} | 24 +- man/scalc_pepprobs.Rd | 10 +- man/search_mgf.Rd | 8 +- man/unique_mvmods.Rd | 6 +- man/vmods_elements.Rd | 6 +- 67 files changed, 1347 insertions(+), 1438 deletions(-) create mode 100644 man/add_ms1_notches.Rd create mode 100644 man/find_ms1_offsets.Rd delete mode 100644 man/fuzzy_match_one.Rd delete mode 100644 man/fuzzy_match_one2.Rd create mode 100644 man/hms2match_one.Rd create mode 100644 man/hpair_mgths.Rd rename man/{pair_mgftheo.Rd => pair_mgftheos.Rd} (81%) diff --git a/DESCRIPTION b/DESCRIPTION index af54545..6210d3f 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: mzion Type: Package Title: Database Searches of Proteomic Mass-spectrometrirc Data -Version: 1.2.6.3 +Version: 1.2.7 Authors@R: person(given = "Qiang", family = "Zhang", diff --git a/R/bin_masses.R b/R/bin_masses.R index 2831c38..bcdcf79 100644 --- a/R/bin_masses.R +++ b/R/bin_masses.R @@ -10,10 +10,11 @@ #' @inheritParams load_mgfs #' @inheritParams calc_pepmasses2 bin_ms1masses <- function (res = NULL, min_mass = 200L, max_mass = 4500L, - ppm_ms1 = 20L, use_ms1_cache = TRUE, - .path_cache = NULL, .path_ms1masses = NULL, - is_ms1_three_frame = TRUE, - out_path = NULL, sys_ram = 24L) + min_len = 7L, max_len = 40L, ppm_ms1 = 20L, + use_ms1_cache = TRUE, .path_cache = NULL, + .path_ms1masses = NULL, is_ms1_three_frame = TRUE, + out_path = NULL, enzyme = "trypsin_p", + sys_ram = 24L) { old_opts <- options() options(warn = 1L) @@ -38,11 +39,10 @@ bin_ms1masses <- function (res = NULL, min_mass = 200L, max_mass = 4500L, # checks pre-existed precursor masses .time_stamp <- get(".time_stamp", envir = .GlobalEnv, inherits = FALSE) - .path_mass <- file.path(.path_ms1masses, .time_stamp) + .path_mass <- file.path(.path_ms1masses, .time_stamp) masses <- list.files(path = .path_mass, pattern = paste0("^pepmasses_", "\\d+\\.rds$")) - len_m <- length(masses) - if (!len_m) + if (!(len_m <- length(masses))) stop("File not found: ", file.path(.path_mass, paste0("pepmasses_", "[...].rds"))) @@ -51,52 +51,32 @@ bin_ms1masses <- function (res = NULL, min_mass = 200L, max_mass = 4500L, "calc_pepmasses2", .time_stamp), fun = fun, - nms = c("min_mass", "max_mass", "ppm_ms1")) + nms = c("min_mass", "max_mass", "min_len", + "max_len", "ppm_ms1")) # already binned - len_bts <- length(.time_bin) - - if (len_bts > 1L) + if ((len_bts <- length(.time_bin)) > 1L) stop("More than one cached results found: \n\n", paste(file.path(.path_ms1masses, .time_stamp, fun), collapse = "\n"), - "\n\nDelete the caches and start over.", - call. = FALSE) + "\n\nDelete the caches and start over.") if (len_bts && use_ms1_cache) { .path_bin <- file.path(.path_ms1masses, .time_stamp, fun, .time_bin) - bins <- list.files(path = .path_bin, pattern = "binned_theopeps_\\d+\\.rds$") - len_b <- length(bins) - if (len_b == len_m) { + if (length(bins) == len_m) { message("Loading bins of MS1 masses from cache.") - .savecall <- FALSE - - # no need of global `.time_bin` - assign(".path_bin", .path_bin, envir = .GlobalEnv) - - return(NULL) + return(.path_bin) } } - - # to be binned message("Binning MS1 masses...") .time_bin <- format(Sys.time(), ".%Y-%m-%d_%H%M%S") .path_bin <- create_dir(file.path(.path_ms1masses, .time_stamp, fun, .time_bin)) - - if (!is.null(res)) { - # (a) process directly - binTheoSeqs(idxes = NULL, - res = res, - min_mass = min_mass, - max_mass = max_mass, - ppm_ms1 = ppm_ms1_bin, - out_path = file.path(.path_bin, "binned_theopeps.rds")) - } - else { + + if (is.null(res)) { # (b) reload idxes <- local({ idxes <- gsub("^pepmasses_(\\d+)\\.rds$", "\\1", masses) @@ -104,19 +84,35 @@ bin_ms1masses <- function (res = NULL, min_mass = 200L, max_mass = 4500L, idxes <- idxes[order(idxes)] }) + if (FALSE) { + n_cores <- local({ + fct <- 20 + free_mem <- find_free_mem(sys_ram) + max_sz <- max(file.size(file.path(.path_mass, masses)))/1024^2 + + n_cores <- min(floor(free_mem/max_sz/fct), detect_cores(8L)) + + if (n_cores < 1L) { + warning("May be out of RAM with large peptide tables.") + n_cores <- 1L + } + + n_cores + }) + } + n_cores <- local({ - fct <- 20 - free_mem <- find_free_mem(sys_ram) - max_sz <- max(file.size(file.path(.path_mass, masses)))/1024^2 + n_cores <- detect_cores(15L) - n_cores <- min(floor(free_mem/max_sz/fct), detect_cores(8L)) - - if (n_cores < 1L) { - warning("May be out of RAM with large peptide tables.") - n_cores <- 1L - } + if (len_m > n_cores) + n_cores <- min(floor(n_cores/2L), len_m) + else + n_cores <- min(n_cores, len_m) - n_cores + if (enzyme == "noenzyme") + n_cores <- floor(n_cores/2L) + + n_cores <- max(1L, n_cores) }) if (n_cores > 1L) { @@ -129,6 +125,7 @@ bin_ms1masses <- function (res = NULL, min_mass = 200L, max_mass = 4500L, c("binTheoSeqs_i", "binTheoSeqs2", "bin_theoseqs", + "s_readRDS", "find_ms1_cutpoints"), envir = environment(mzion::matchMS)) @@ -147,23 +144,35 @@ bin_ms1masses <- function (res = NULL, min_mass = 200L, max_mass = 4500L, parallel::stopCluster(cl) } - else { + else lapply(idxes, binTheoSeqs_i, min_mass, max_mass, ppm_ms1_bin, .path_mass, .path_bin) - } } + else { + # (a) process directly + binTheoSeqs(idxes = NULL, + res = res, + min_mass = min_mass, + max_mass = max_mass, + ppm_ms1 = ppm_ms1_bin, + enzyme = enzyme, + out_path = file.path(.path_bin, "binned_theopeps.rds")) + } + + pat_b <- "^binned_theopeps_[0-9]+\\.rds" + len_b <- length(list.files(.path_bin, pattern = pat_b)) + + if (len_b != len_m) + stop("May need more RAM: expect ", len_m, " \"", pat_b, "\" files, ", + "but found ", len_b, " files under \n\"", .path_bin, "\"\n") .savecall <- TRUE - assign(".time_bin", .time_bin, envir = .GlobalEnv) - assign(".path_bin", .path_bin, envir = .GlobalEnv) - local({ file <- file.path(out_path, "Calls", ".cache_info.rds") - if (file.exists(file)) { + if (file.exists(file)) .cache_info <- qs::qread(file) - } .cache_info$.time_bin <- .time_bin .cache_info$.path_bin <- .path_bin @@ -173,7 +182,7 @@ bin_ms1masses <- function (res = NULL, min_mass = 200L, max_mass = 4500L, message("Completed precusor bins at: ", Sys.time()) - invisible(NULL) + invisible(.path_bin) } @@ -181,7 +190,7 @@ bin_ms1masses <- function (res = NULL, min_mass = 200L, max_mass = 4500L, #' #' @param in_path An input path of \code{pepmasses_}. #' @inheritParams binTheoSeqs2 -binTheoSeqs_i <- function (idx = 1L, min_mass = 200L,max_mass = 4500L, +binTheoSeqs_i <- function (idx = 1L, min_mass = 200L, max_mass = 4500L, ppm_ms1 = 10L, in_path = NULL, out_path = NULL) { if (is.null(in_path)) @@ -189,8 +198,7 @@ binTheoSeqs_i <- function (idx = 1L, min_mass = 200L,max_mass = 4500L, message("\tSet: ", idx) - in_nm <- paste0("pepmasses_", idx, ".rds") - res <- s_readRDS(in_nm, in_path) + res <- s_readRDS(paste0("pepmasses_", idx, ".rds"), in_path) binTheoSeqs2(idx = idx, res = res, @@ -241,7 +249,6 @@ bin_theoseqs <- function (peps = NULL, out_nm = NULL, min_mass = 200L, if (!length(peps)) { out <- NULL qs::qsave(out, out_nm, preset = "fast") - return(NULL) } @@ -255,7 +262,7 @@ bin_theoseqs <- function (peps = NULL, out_nm = NULL, min_mass = 200L, out <- dplyr::arrange(out, frame, pep_seq) out <- split(out, out$frame, drop = FALSE) - + out <- lapply(out, function (x) { x[["frame"]] <- NULL; x }) qs::qsave(out, out_nm, preset = "fast") invisible(NULL) @@ -272,6 +279,7 @@ bin_theoseqs <- function (peps = NULL, out_nm = NULL, min_mass = 200L, #' @param max_mass Numeric; the maximum MS1 mass. #' @param ppm_ms1 Numeric; (half of) the error tolerance of MS1 mass in ppm. #' @param out_path The output path. +#' @param enzyme The assume enzyme activity. #' @examples #' \donttest{ #' library(mzion) @@ -281,9 +289,9 @@ bin_theoseqs <- function (peps = NULL, out_nm = NULL, min_mass = 200L, #' } #' @return Lists of theoretical peptides binned by MS1 masses. The lists #' correspond to the lists of \code{res}. -#' @import parallel binTheoSeqs <- function (idxes = NULL, res = NULL, min_mass = 200L, - max_mass = 4500L, ppm_ms1 = 10L, out_path = NULL) + max_mass = 4500L, ppm_ms1 = 10L, enzyme = "trypsin_p", + out_path = NULL) { if (is.null(res)) stop("`res` cannot be NULL.") @@ -305,20 +313,23 @@ binTheoSeqs <- function (idxes = NULL, res = NULL, min_mass = 200L, n_cores <- local({ len <- length(res) - n_cores <- detect_cores(16L) + n_cores <- detect_cores(15L) if (len > n_cores) n_cores <- min(floor(n_cores/2L), len) else n_cores <- min(n_cores, len) + + if (enzyme == "noenzyme") + n_cores <- floor(n_cores/2L) n_cores <- max(1L, n_cores) }) cl <- parallel::makeCluster(getOption("cl.cores", n_cores)) - parallel::clusterExport(cl, list("qread", "qsave"), envir = environment(qs::qsave)) - + parallel::clusterExport(cl, list("qread", "qsave"), + envir = environment(qs::qsave)) parallel::clusterExport(cl, c("bin_theoseqs", "find_ms1_cutpoints"), envir = environment(mzion::matchMS)) diff --git a/R/funs.R b/R/funs.R index 9fd9af0..6025f40 100644 --- a/R/funs.R +++ b/R/funs.R @@ -16,10 +16,8 @@ # # $ion_ladder.R # [1] "ms2ions_by_type" "byions" "czions" "axions" "bions_base" "yions_base" -# [7] "b2ions_base" "bstarions" "bstar2ions" "b0ions" "b02ions" "y2ions" -# [13] "ystarions" "ystar2ions" "y0ions" "y02ions" "cions_base" "c2ions" -# [19] "zions_base" "z2ions" "aions_base" "a2ions" "astarions" "astar2ions" -# [25] "a0ions" "a02ions" "xions_base" "x2ions" +# [7] "cions_base" "zions_base" "c2ions" "z2ions" "aions_base" "xions_base" +# [13] "a2ions" "astarions" "astar2ions" "a0ions" "a02ions" "x2ions" # # $mapMS2ions.R # [1] "mapMS2ions" "match_mgf_path" "match_raw_id" "add_raw_ids" diff --git a/R/ion_ladder.R b/R/ion_ladder.R index a1d6c63..26c8577 100644 --- a/R/ion_ladder.R +++ b/R/ion_ladder.R @@ -6,37 +6,44 @@ #' @param ntmass The mass of a fixed or variable N-term modification. #' @param ctmass The mass of a fixed or variable C-term modification. #' @inheritParams matchMS -ms2ions_by_type <- function (aam, ntmass, ctmass, type_ms2ions = "by", - digits = 4L) +ms2ions_by_type <- function (aam, ntmass, ctmass, type_ms2ions = "by") { switch(type_ms2ions, - by = byions(ntmass, ctmass, aam, digits), - cz = czions(ntmod, ctmod, aam, digits), - ax = axions(ntmod, ctmod, aam, digits), + by = byions(ntmass = ntmass, ctmass = ctmass, aam = aam), + cz = czions(ntmass = ntmass, ctmass = ctmass, aam = aam), + ax = axions(ntmass = ntmass, ctmass = ctmass, aam = aam), stop("Unknown type.", call. = FALSE)) } #' Masses of singly-charged b- and y-ions. #' -#' @inheritParams ms2ions_by_type +#' b-ions first, then y-ions +#' #' @rdname bions_base -byions <- function (ntmass, ctmass, aam, digits = 4L) - c(bions_base(aam, ntmass, digits), yions_base(aam, ctmass, digits)) +byions <- function (ntmass, ctmass, aam) + c(cumsum(c(ntmass, aam))[-1], cumsum(c(ctmass, aam[length(aam):1L]))[-1]) #' Masses of singly-charged c- and z-ions. #' #' @rdname bions_base -czions <- function (ntmass, ctmass, aam, digits = 4L) - c(cions_base(aam, ntmass, digits), zions_base(aam, ctmass, digits)) +czions <- function (ntmass, ctmass, aam) + c(cumsum(c(ntmass + 17.026549, aam))[-1], + cumsum(c(ctmass - 17.026549, aam[length(aam):1L]))[-1]) + #' Masses of singly-charged a- and x-ions. #' #' @rdname bions_base -axions <- function (ntmass, ctmass, aam, digits = 4L) - c(aions_base(aam, ntmass, digits), xions_base(aam, ctmass, digits)) +axions <- function (ntmass, ctmass, aam) + c(cumsum(c(ntmass - 27.9949146, aam))[-1], + cumsum(c(ctmass + 25.9792646, aam[length(aam):1L]))[-1]) + +### +# No direct uses of the followings. +### #' B-ions. #' @@ -48,8 +55,7 @@ axions <- function (ntmass, ctmass, aam, digits = 4L) #' The masses reflects fixed/variable modifications, and/or fixed/variable #' neutral losses. #' -#' @param digits Integer; the number of decimal places to be used. -#' @param tmass The mass of a fixed or variable N-term or C-term modification. +#' @param ntmass The mass of a fixed or variable N-term modification. #' #' @importFrom stringr str_split #' @examples @@ -152,249 +158,107 @@ axions <- function (ntmass, ctmass, aam, digits = 4L) #' #' b <- mzion:::bions_base(aam, ntmass) #' y <- mzion:::yions_base(aam, ctmass) -#' #' } -bions_base <- function (aam, tmass, digits = 4L) -{ - ions <- c(tmass, aam) - ions <- cumsum(ions) - ions <- ions[-1] - # round(ions, digits = digits) -} +bions_base <- function (aam, ntmass) cumsum(c(ntmass, aam))[-1] #' Y-ions. #' -#' @rdname bions_base -yions_base <- function (aam, tmass, digits = 4L) -{ - # (1) OH (C-term), + H (neutralizes the N-term on a fragment) + H+ - # (2) Other C-term (other than OH) + H + H+: X + 1.007825 + 1.00727647 - ions <- c(tmass, aam[length(aam):1L]) - ions <- cumsum(ions) - ions <- ions[-1] - # round(ions, digits = digits) -} - - -#' B2-ions. -#' -#' @param n The charge state. -#' @rdname bions_base -b2ions_base <- function (aam, tmass, digits = 4L, n = 2L) - (bions_base(aam, tmass, digits) + 1.00727647)/n - - -#' B*-ions. -#' -#' @rdname bions_base -bstarions <- function (aam, tmass, digits = 4L) -{ - # -NH3:17.026549 - ions <- c(tmass - 17.026549, aam) - ions <- cumsum(ions) - ions <- ions[-1] - # round(ions, digits = digits) -} - - -#' B*2-ions. -#' -#' @param n The charge state. -#' @rdname bions_base -bstar2ions <- function (aam, tmass, digits = 4L, n = 2L) - (bstarions(aam, tmass, digits) + 1.00727647)/n - - -#' B0-ions. -#' -#' \code{H2O = 18.010565}. +#' # (1) OH (C-term), + H (neutralizes the N-term on a fragment) + H+ +#' # (2) Other C-term (other than OH) + H + H+: X + 1.007825 + 1.00727647 #' +#' @param ctmass The mass of a fixed or variable C-term modification. #' @rdname bions_base -b0ions <- function (aam, tmass, digits = 4L) -{ - ions <- c(tmass - 18.010565, aam) - ions <- cumsum(ions) - ions <- ions[-1] - # round(ions, digits = digits) -} +yions_base <- function (aam, ctmass) cumsum(c(ctmass, aam[length(aam):1L]))[-1] -#' B02-ions. +#' C-ions. #' -#' @param n The charge state. -#' @rdname bions_base -b02ions <- function (aam, tmass, digits = 4L, n = 2L) - (b0ions(aam, tmass, digits) + 1.00727647)/n - - -#' Y2-ions. +#' \code{NH3 = 17.026549} #' -#' @param n The charge state. #' @rdname bions_base -y2ions <- function (aam, tmass, digits = 4L, n = 2L) - (yions_base(aam, tmass, digits) + 1.00727647)/n +cions_base <- function (aam, ntmass) cumsum(c(ntmass + 17.026549, aam))[-1] -#' Y*-ions. +#' Z-ions. #' #' @rdname bions_base -ystarions <- function (aam, tmass, digits = 4L) -{ - ions <- c(tmass - 17.026549, aam[length(aam):1L]) - ions <- cumsum(ions) - ions <- ions[-1] - # round(ions, digits = digits) -} +zions_base <- function (aam, ctmass) + cumsum(c(ctmass - 17.026549, aam[length(aam):1L]))[-1] -#' Y*2-ions. +#' C2-ions. #' #' @param n The charge state. #' @rdname bions_base -ystar2ions <- function (aam, tmass, digits = 4L, n = 2L) - (ystarions(aam, tmass, digits) + 1.00727647)/n - - -#' Y0-ions. -#' -#' @rdname bions_base -y0ions <- function (aam, tmass, digits = 4L) -{ - ions <- c(tmass - 18.010565, aam[length(aam):1L]) - ions <- cumsum(ions) - ions <- ions[-1] - # round(ions, digits = digits) -} +c2ions <- function (aam, ntmass, n = 2L) (cions_base(aam, ntmass) + 1.00727647)/n -#' Y02-ions. +#' Z2-ions. #' #' @param n The charge state. #' @rdname bions_base -y02ions <- function (aam, tmass, digits = 4L, n = 2L) - (y0ions(aam, tmass, digits) + 1.00727647)/n - - -#' C-ions. -#' -#' @rdname bions_base -cions_base <- function (aam, tmass, digits = 4L) -{ - ions <- c(tmass + 17.026549, aam) - ions <- cumsum(ions) - ions <- ions[-1] - # round(ions, digits = digits) -} +z2ions <- function (aam, ctmass, n = 2L) (zions_base(aam, ctmass) + 1.00727647)/n -#' C2-ions. +#' A-ions. #' -#' @param n The charge state. -#' @rdname bions_base -c2ions <- function (aam, tmass, digits = 4L, n = 2L) - (cions_base(aam, tmass, digits) + 1.00727647)/n - +#' \code{CO = 27.9949146} -#' Z-ions. -#' #' @rdname bions_base -zions_base <- function (aam, tmass, digits = 4L) -{ - ions <- c(tmass - 17.026549, aam[length(aam):1L]) - ions <- cumsum(ions) - ions <- ions[-1] - # round(ions, digits = digits) -} +aions_base <- function (aam, ntmass) cumsum(c(ntmass - 27.9949146, aam))[-1] -#' Z2-ions. +#' X-ions. #' -#' @param n The charge state. -#' @rdname bions_base -z2ions <- function (aam, tmass, digits = 4L, n = 2L) - (zions_base(aam, tmass, digits) + 1.00727647)/n - - -#' A-ions. +#' \code{+CO -H2 = 27.9949146 - 2 * 1.007825} #' #' @rdname bions_base -aions_base <- function (aam, tmass, digits = 4L) -{ - ions <- c(tmass - 27.9949146, aam) - ions <- cumsum(ions) - ions <- ions[-1] - # round(ions, digits = digits) -} +xions_base <- function (aam, ctmass) + cumsum(c(ctmass + 25.9792646, aam[length(aam):1L]))[-1] #' A2-ions. #' #' @param n The charge state. #' @rdname bions_base -a2ions <- function (aam, tmass, digits = 4L, n = 2L) - (aions_base(aam, tmass, digits) + 1.00727647)/n +a2ions <- function (aam, ntmass, n = 2L) (aions_base(aam, ntmass) + 1.00727647)/n #' A*-ions. #' +#' \code{-CO -NH3 = -(27.9949146 + 17.026549)} +#' #' @rdname bions_base -astarions <- function (aam, tmass, digits = 4L) -{ - # -CO -NH3 = -(27.9949146 + 17.026549) - ions <- c(tmass - 45.0214636, aam) - ions <- cumsum(ions) - ions <- ions[-1] - # round(ions, digits = digits) -} +astarions <- function (aam, ntmass) cumsum(c(ntmass - 45.0214636, aam))[-1] #' A*2-ions. #' #' @param n The charge state. #' @rdname bions_base -astar2ions <- function (aam, tmass, digits = 4L, n = 2L) - (astarions(aam, tmass, digits) + 1.00727647)/n +astar2ions <- function (aam, ntmass, n = 2L) (astarions(aam, ntmass) + 1.00727647)/n #' A0-ions. #' +#' \code{-CO -H2O = -(27.9949146 + 18.010565)} +#' #' @rdname bions_base -a0ions <- function (aam, tmass, digits = 4L) -{ - # -CO -H2O = -(27.9949146 + 18.010565) - ions <- c(tmass - 46.0054796, aam) - ions <- cumsum(ions) - ions <- ions[-1] - # round(ions, digits = digits) -} +a0ions <- function (aam, ntmass) cumsum(c(ntmass - 46.0054796, aam))[-1] #' A02-ions. #' #' @param n The charge state. #' @rdname bions_base -a02ions <- function (aam, tmass, digits = 4L, n = 2L) - (a0ions(aam, tmass, digits) + 1.00727647)/n - - -#' X-ions. -#' -#' @rdname bions_base -xions_base <- function (aam, tmass, digits = 4L) -{ - # +CO -H2 = 27.9949146 - 2*1.007825 - ions <- c(tmass + 25.9792646, aam[length(aam):1L]) - ions <- cumsum(ions) - ions <- ions[-1] - # round(ions, digits = digits) -} +a02ions <- function (aam, ntmass, n = 2L) (a0ions(aam, ntmass) + 1.00727647)/n #' X2-ions. #' #' @param n The charge state. #' @rdname bions_base -x2ions <- function (aam, tmass, digits = 4L, n = 2L) - (xions_base(aam, tmass, digits) + 1.00727647)/n +x2ions <- function (aam, ctmass, n = 2L) (xions_base(aam, ctmass) + 1.00727647)/n + diff --git a/R/mgfs.R b/R/mgfs.R index 7283333..dd65fec 100644 --- a/R/mgfs.R +++ b/R/mgfs.R @@ -138,8 +138,7 @@ load_mgfs <- function (out_path, mgf_path, min_mass = 200L, max_mass = 4500L, digits = digits) } else if (len_mzml) { - warning("Please uncheck \"Use zlib compression\" with mzML from MSConvert.", - call. = FALSE) + warning("Please uncheck \"Use zlib compression\" with mzML from MSConvert.") readmzML(filepath = mgf_path, filelist = filelist, @@ -245,16 +244,15 @@ readMGF <- function (filepath = NULL, filelist = NULL, mgf_format <- "Bruker-DataAnalysis" } - ans <- list(data_format = data_format, mgf_format = mgf_format) - qs::qsave(ans, file.path(filepath, "info_format.rds"), preset = "fast") + qs::qsave(list(data_format = data_format, mgf_format = mgf_format), + file.path(filepath, "info_format.rds"), preset = "fast") }) ## Reads MGF into chunks # separate parallel process: # (1) one large MGF file and parallel chunks # (2) parallel five MGF files and parallel chunks in each - len <- length(filelist) - n_cores <- min(len, detect_cores(32L)) + n_cores <- min(len <- length(filelist), detect_cores(32L)) if (n_cores == 1L) raw_files <- readlineMGFs(1, filelist, filepath, raw_file) @@ -342,8 +340,7 @@ post_readmgf <- function (df, min_mass = 200L, max_mass = 4500L, ppm_ms1 = 10L, { df <- dplyr::arrange(df, ms1_mass) # df <- dplyr::filter(df, ms1_mass >= min_mass, ms1_mass <= max_mass) - df <- dplyr::mutate(df, frame = find_ms1_interval(ms1_mass, from = min_mass, ppm = ppm_ms1)) - + raws_files <- df$raw_file raws <- raws_files[!duplicated.default(raws_files)] inds <- seq_along(raws) @@ -364,7 +361,6 @@ post_readmgf <- function (df, min_mass = 200L, max_mass = 4500L, ppm_ms1 = 10L, qs::qsave(df[[i]], file.path(filepath, paste0("mgf_queries_", nms[i], ".rds")), preset = "fast") } - invisible(NULL) } @@ -463,9 +459,8 @@ read_mgf_chunks <- function (filepath = "~/mzion/mgf/temp_1", quant = "none", digits = 4L) { filelist <- list.files(path = file.path(filepath), pattern = "^.*\\.mgf$") - len <- length(filelist) - if (!len) + if (!(len <- length(filelist))) stop("No mgf files under ", filepath, call. = FALSE) n_cores <- min(detect_cores(32L), len) @@ -527,7 +522,6 @@ read_mgf_chunks <- function (filepath = "~/mzion/mgf/temp_1", digits = digits) parallel::stopCluster(cl) - # gc() out <- dplyr::bind_rows(out) @@ -728,7 +722,7 @@ proc_mgfs <- function (lines, topn_ms2ions = 100L, options(digits = 9L) begins <- .Internal(which(stringi::stri_startswith_fixed(lines, "BEGIN IONS"))) - ends <- .Internal(which(stringi::stri_endswith_fixed(lines, "END IONS"))) + ends <- .Internal(which(stringi::stri_endswith_fixed(lines, "END IONS"))) ## MS1 # (1) m-over-z and intensity @@ -740,12 +734,10 @@ proc_mgfs <- function (lines, topn_ms2ions = 100L, ms1_moverzs <- lapply(ms1s, function (x) as.numeric(x[, 1])) ms1_moverzs <- .Internal(unlist(ms1_moverzs, recursive = FALSE, use.names = FALSE)) # not as.integer; intensity may be > .Machine$integer.max (2147483647) - ms1_ints <- lapply(ms1s, function (x) round(as.numeric(x[, 2]), digits = 1L)) + ms1_ints <- lapply(ms1s, function (x) as.numeric(x[, 2])) ms1_ints <- .Internal(unlist(ms1_ints, recursive = FALSE, use.names = FALSE)) - rm(list = c("ms1s")) - gc() - + # (2) retention time ret_times <- stringi::stri_replace_first_fixed(lines[begins + n_to_rt], "RTINSECONDS=", "") ret_times <- as.numeric(ret_times) @@ -758,10 +750,7 @@ proc_mgfs <- function (lines, topn_ms2ions = 100L, charges <- as.integer(charges) ms1_masses <- mapply(function (x, y) x * y - y * 1.00727647, - ms1_moverzs, charges, - SIMPLIFY = TRUE, USE.NAMES = FALSE) - ms1_masses <- round(ms1_masses, digits = digits) - ms1_moverzs <- round(ms1_moverzs, digits = digits) + ms1_moverzs, charges, SIMPLIFY = TRUE, USE.NAMES = FALSE) rows <- (charges >= ms1_charge_range[1] & charges <= ms1_charge_range[2] & ret_times >= ret_range[1] & ret_times <= ret_range[2] & @@ -770,8 +759,8 @@ proc_mgfs <- function (lines, topn_ms2ions = 100L, !is.na(ms1_masses)) # timsTOF data may have undetermined charge states - na_rows <- .Internal(which(is.na(rows))) - if (length(na_rows)) rows[na_rows] <- FALSE + if (length(na_rows <- .Internal(which(is.na(rows))))) + rows[na_rows] <- FALSE begins <- begins[rows] ends <- ends[rows] @@ -802,11 +791,11 @@ proc_mgfs <- function (lines, topn_ms2ions = 100L, } else if (type_mgf == "default_pasef") { # one raw_file one .d file guaranteed - raw_files <- rep(raw_file, length(begins)) + raw_files <- rep_len(raw_file, length.out = length(begins)) scan_nums <- stringi::stri_replace_first_fixed(lines[begins + n_to_scan], "RAWSCANS=", "") } else { - stop("Unknown MGF format.", call. = FALSE) + stop("Unknown MGF format.") } ## MS2 @@ -876,8 +865,7 @@ proc_mgfs <- function (lines, topn_ms2ions = 100L, ms2_n = lens, # charge = charges, rptr_moverz = rptr_moverzs, - rptr_int = rptr_ints, - ) + rptr_int = rptr_ints) } @@ -903,9 +891,7 @@ sub_mgftopn <- function (ms2_moverzs, ms2_ints, topn_ms2ions = 100L, ms2_ints <- mapply(function (x, y) x[y], ms2_ints, oks, SIMPLIFY = FALSE, USE.NAMES = FALSE) - - rm(list = c("oks")) - + ## subsets by topn lens <- lapply(ms2_moverzs, length) lens <- .Internal(unlist(lens, recursive = FALSE, use.names = FALSE)) @@ -959,7 +945,7 @@ sub_mgftopn <- function (ms2_moverzs, ms2_ints, topn_ms2ions = 100L, } ## - rows <- mapply(which_topx2, ys, ok_percs, SIMPLIFY = FALSE, USE.NAMES = FALSE) + rows <- mapply(which_topx2, ys, ok_percs, SIMPLIFY = FALSE, USE.NAMES = FALSE) ans_x <- mapply(function (x, y) x[y], xs, rows, SIMPLIFY = FALSE, USE.NAMES = FALSE) ans_y <- mapply(function (x, y) x[y], ys, rows, SIMPLIFY = FALSE, USE.NAMES = FALSE) @@ -971,8 +957,6 @@ sub_mgftopn <- function (ms2_moverzs, ms2_ints, topn_ms2ions = 100L, if (i %% 5000L == 0) gc() } - - rm(list = c("m_long", "i_long")) } else { rows <- lapply(ms2_ints[is_long], which_topx2, topn_ms2ions) @@ -984,11 +968,7 @@ sub_mgftopn <- function (ms2_moverzs, ms2_ints, topn_ms2ions = 100L, ms2_moverzs[is_long] <- mapply(function (x, y) x[y], ms2_moverzs[is_long], rows, SIMPLIFY = FALSE, USE.NAMES = FALSE) - - rm(list = c("rows")) } - - rm(list = c("is_long")) } # also handles MS2 intensity max-outs, which usually don't happen @@ -1073,7 +1053,7 @@ extract_mgf_rptrs <- function (ms2_moverzs, ms2_ints, quant = "none", #' } #' @return Frame numbers. #' @seealso find_ms1_cutpoints -find_ms1_interval <- function (mass = 1800.0, from = 115L, ppm = 10L) +find_ms1_interval <- function (mass = 1800.0, from = 200L, ppm = 10L) { ceiling(log(unlist(mass, recursive = FALSE, use.names = FALSE)/from)/log(1+ppm/1e6)) } @@ -1106,7 +1086,6 @@ find_mgf_type <- function (file) b2 <- len_h + 1L hdr[b2] <- "BEGIN IONS" begins <- c(begins, b2) - rm(list = "b2") } # if (!length(ends)) @@ -1428,14 +1407,12 @@ proc_mzml <- function (file, topn_ms2ions = 100L, ms1_charge_range = c(2L, 6L), charges <- .Internal(unlist(charges, recursive = FALSE, use.names = FALSE)) charges <- as.integer(charges) df <- dplyr::mutate(df, charge = charges) - rm(list = c("charges", "ms1_charges")) } df <- dplyr::filter(df, charge >= ms1_charge_range[1], charge <= ms1_charge_range[2], ret_time >= ret_range[1], ret_time <= ret_range[2], ms1_mass >= min_mass, ms1_mass <= max_mass, ) - df[["charge"]] <- NULL # subsets by top-n and min_ms2mass @@ -1488,8 +1465,7 @@ read_mzml <- function (xml_file, tmt_reporter_lower = 126.1, tmt_reporter_upper scan_nums[i] <- gsub(".* scan=(.*)$", "\\1", xml2::xml_attr(x, "id")) xc <- xml2::xml_children(x) idx_precursor <- grep("precursorList", xc) - rm(list = c("x")) - + if (length(idx_precursor)) { nms <- xml2::xml_attr(xc, "name") idx_title <- .Internal(which(nms == "spectrum title")) @@ -1508,8 +1484,7 @@ read_mzml <- function (xml_file, tmt_reporter_lower = 126.1, tmt_reporter_upper idx_scan_start <- .Internal(which(xml2::xml_attr(scanList_scan, "name") == "scan start time")) ret_times[i] <- xml2::xml_attr(scanList_scan[[idx_scan_start]], "value") - rm(list = c("nms", "title", "scanList_scan", "scanList")) - + ## precursorList precursorList <- xml2::xml_children(xc[[idx_precursor]]) @@ -1528,10 +1503,7 @@ read_mzml <- function (xml_file, tmt_reporter_lower = 126.1, tmt_reporter_upper xml2::xml_attr(selectedIonc[[3]], "value") else numeric(1) - - rm(list = c("precursor", "precursorc", "idx_selectedIonList", - "selectedIon", "selectedIonc")) - + ## binaryDataArrayList binData <- xml2::xml_children(xml2::xml_children(xc[[idx_bin]])) ms2s <- xml2::xml_contents(binData) @@ -1541,7 +1513,6 @@ read_mzml <- function (xml_file, tmt_reporter_lower = 126.1, tmt_reporter_upper ms2_moverzs[[i]] <- readBin(r1, "double", n = ms2_n, size = 8L) ms2_ints[[i]] <- readBin(r2, "double", n = ms2_n, size = 8L) ms2_ns[i] <- ms2_n - rm(list = c("r1", "r2", "ms2s", "binData", "ms2_n")) } } @@ -1560,11 +1531,14 @@ read_mzml <- function (xml_file, tmt_reporter_lower = 126.1, tmt_reporter_upper charges <- as.integer(ms1_charges) ms1_charges <- paste0(ms1_charges, "+") # assume always "+" for now - ms1_moverzs <- round(as.numeric(ms1_moverzs), digits = digits) - ms1_masses <- round(ms1_moverzs * charges - charges * 1.00727647, digits = digits) + # ms1_moverzs <- round(as.numeric(ms1_moverzs), digits = digits) + # ms1_masses <- round(ms1_moverzs * charges - charges * 1.00727647, digits = digits) + ms1_moverzs <- as.numeric(ms1_moverzs) + ms1_masses <- ms1_moverzs * charges - charges * 1.00727647 + # ms1_ints not "as.integer": may be > .Machine$integer.max (2147483647) - ms1_ints <- round(as.numeric(ms1_ints), digits = 1L) - ret_times <- round(as.numeric(ret_times) * 60, digits = 2L) + ms1_ints <- as.numeric(ms1_ints) + ret_times <- as.numeric(ret_times) * 60 # extract the TMT region of MS2 moverz and intensity # (also convert reporter-ion intensities to integers) @@ -1579,7 +1553,6 @@ read_mzml <- function (xml_file, tmt_reporter_lower = 126.1, tmt_reporter_upper ms2_ints <- restmt[["ms2_ints"]] rptr_moverzs <- restmt[["rptr_moverzs"]] rptr_ints <- restmt[["rptr_ints"]] - rm(list = "restmt") ms2_moverzs <- if (index_mgf_ms2) lapply(ms2_moverzs, index_mz, min_ms2mass, ppm_ms2/1E6) @@ -1605,7 +1578,7 @@ read_mzml <- function (xml_file, tmt_reporter_lower = 126.1, tmt_reporter_upper charge = charges, rptr_moverz = rptr_moverzs, - rptr_int = rptr_ints, ) + rptr_int = rptr_ints) } diff --git a/R/ms1_precursors.R b/R/ms1_precursors.R index 1f3af90..3dea5ba 100644 --- a/R/ms1_precursors.R +++ b/R/ms1_precursors.R @@ -58,6 +58,7 @@ calc_pepmasses2 <- function (aa_masses = NULL, "Oxidation (M)", "Deamidated (N)", "Gln->pyro-Glu (N-term = Q)"), + rm_dup_term_anywhere = TRUE, fixedlabs = NULL, varlabs = NULL, mod_motifs = NULL, @@ -70,7 +71,6 @@ calc_pepmasses2 <- function (aa_masses = NULL, maxn_sites_per_vmod = 3L, min_len = 7L, max_len = 40L, max_miss = 2L, min_mass = 200L, max_mass = 4500L, - n_13c = 0L, out_path = NULL, digits = 4L, use_ms1_cache = TRUE, @@ -109,8 +109,7 @@ calc_pepmasses2 <- function (aa_masses = NULL, # (7) Twelve types of VARIABLE modifications/masses: # adds variable terminal masses (tmod+), variable anywhere masses (vmods+) # neutral losses (fnl+) etc. - - + old_opts <- options() options(warn = 1L) on.exit(options(old_opts), add = TRUE) @@ -143,6 +142,7 @@ calc_pepmasses2 <- function (aa_masses = NULL, }) # toggle this if no new arguments to bypass + # new_args <- c(ms1_notches = 0) new_args <- NULL .time_stamp <- match_calltime( @@ -157,7 +157,7 @@ calc_pepmasses2 <- function (aa_masses = NULL, "maxn_fasta_seqs", "maxn_vmods_setscombi", "maxn_vmods_per_pep", "maxn_sites_per_vmod", "min_len", "max_len", "max_miss", - "min_mass", "max_mass", "n_13c"), + "min_mass", "max_mass"), # exception: new arguments need matches but not defined in earlier versions new_args = new_args) @@ -176,6 +176,7 @@ calc_pepmasses2 <- function (aa_masses = NULL, out_path = file.path(.path_fasta, "ms1masses", .time_stamp), fixedmods = fixedmods, varmods = varmods, + rm_dup_term_anywhere = rm_dup_term_anywhere, varlabs = varlabs, mod_motifs = mod_motifs, maxn_vmods_setscombi = maxn_vmods_setscombi) @@ -190,9 +191,6 @@ calc_pepmasses2 <- function (aa_masses = NULL, file.path(.path_cache, fun, paste0(.time_stamp, ".rda")), " and try again.") - rm(list = c("aa_masses_all", "files")) - gc() - fwd_peps <- NULL rev_peps <- NULL @@ -214,6 +212,7 @@ calc_pepmasses2 <- function (aa_masses = NULL, out_path = path_tstamp, fixedmods = fixedmods, varmods = varmods, + rm_dup_term_anywhere = rm_dup_term_anywhere, varlabs = varlabs, mod_motifs = mod_motifs, maxn_vmods_setscombi = maxn_vmods_setscombi) @@ -271,7 +270,6 @@ calc_pepmasses2 <- function (aa_masses = NULL, max_len = max_len, aa_masses = aa_masses_0, ftmass = ftmass) - gc() } else { @@ -332,8 +330,7 @@ calc_pepmasses2 <- function (aa_masses = NULL, # `length(fwd_peps) == length(aa_masses_all)` after the step. # Note 1-to-n expansion: # `length(fwd_peps) == length(aa_masses_all)` after the step. - # n_cores = 4L at noenzyme and 16L otherwise - n_cores <- detect_cores(16L) + n_cores <- detect_cores(15L) if (isTRUE(enzyme == "noenzyme")) n_cores <- floor(max(1L, n_cores/4L)) @@ -383,8 +380,7 @@ calc_pepmasses2 <- function (aa_masses = NULL, "ct_counts", "rm_char_in_nfirst", "rm_char_in_nlast"), - envir = environment(mzion::matchMS) - ) + envir = environment(mzion::matchMS)) # aa_masses_all[[1]] is for the original all-fixed mode not for the coerced, # otherwise, e.g. fixed to variable coercion of "TMT (K)" with a conflicting @@ -398,9 +394,8 @@ calc_pepmasses2 <- function (aa_masses = NULL, motifs_all = motifs_all, max_miss = max_miss, max_len = max_len, # different purpose - enzyme = enzyme - ) - + enzyme = enzyme) + parallel::stopCluster(cl) fwd_peps <- lapply(seq_along(aa_masses_all), function (i) { @@ -413,7 +408,7 @@ calc_pepmasses2 <- function (aa_masses = NULL, message("\tCompleted bare peptides distributions.") rm(list = c("seqs_0")) - gc() + # gc() # (c) Protein-peptide associations message("Summarizing the association of proteins and peptides.") @@ -431,17 +426,17 @@ calc_pepmasses2 <- function (aa_masses = NULL, simple_prots_peps) parallel::stopCluster(cl) - gc() + # gc() prps <- flatten_list(prps) qs::qsave(prps, file.path(path_prp, "simple_prot_pep.rds"), preset = "fast") rm(list = c("prps")) - gc() + # gc() # (d) Flattened peptide lists (prot_acc's removed) fwd_peps <- lapply(fwd_peps, flat_pepseqs) - gc() + # gc() # (e) Adjusted base masses if with fixed-to-variable coercion @@ -451,8 +446,7 @@ calc_pepmasses2 <- function (aa_masses = NULL, fwd_peps[[1]] <- adj_base_masses(fwd_peps[[1]], aa_masses_0, aa_masses_1, min_mass = min_mass, max_mass = max_mass, digits = digits) - - gc() + # gc() # --- Delta masses of `variable` terminals --- @@ -505,7 +499,7 @@ calc_pepmasses2 <- function (aa_masses = NULL, # (5, 6) "amods- tmod+ vnl- fnl+", "amods- tmod- vnl- fnl+" if (FALSE) { - n_cores <- detect_cores(32L) + n_cores <- detect_cores(32L) - 1L inds <- which(types %in% c("amods- tmod- vnl- fnl+", "amods- tmod+ vnl- fnl+")) @@ -580,7 +574,7 @@ calc_pepmasses2 <- function (aa_masses = NULL, # 32L: 22.6 # 64L: 39 - n_cores <- detect_cores(16L) + n_cores <- detect_cores(15L) inds <- which(types %in% c("amods+ tmod- vnl- fnl-", "amods+ tmod+ vnl- fnl-", @@ -649,36 +643,31 @@ calc_pepmasses2 <- function (aa_masses = NULL, } parallel::stopCluster(cl) - gc() + # gc() } suppressWarnings( rm(list = c("amods_i", "fmods_nl", "fmods_ps", "fwd_peps_i", - "vmods_nl_i", "aa_masses_1", "aa_masses_i")) - ) - - gc() - - fwd_peps <- lapply(fwd_peps, add_ms1_13c, n_13c) + "vmods_nl_i", "aa_masses_1", "aa_masses_i"))) + # gc() + # fwd_peps <- lapply(fwd_peps, add_ms1_13c, n_13c, max_mass) + # fwd_peps <- lapply(fwd_peps, add_ms1_notches, ms1_notches, max_mass) # === Outputs === path_masses <- create_dir(file.path(.path_ms1masses, .time_stamp)) - fwd_peps <- purrr::map2(aa_masses_all, fwd_peps, ~ { - attr(.x, "data") <- .y - .x - }) + fwd_peps <- mapply(function (x, y) { + attr(x, "data") <- y + x + }, aa_masses_all, fwd_peps, SIMPLIFY = FALSE, USE.NAMES = FALSE) names(fwd_peps) <- seq_along(aa_masses_all) - gc() - for (i in seq_along(fwd_peps)) { + for (i in seq_along(fwd_peps)) qs::qsave(fwd_peps[[i]], file.path(path_masses, paste0("pepmasses_", i, ".rds")), preset = "fast") - } - gc() .savecall <- TRUE message("\n=== Completed MS1 precursor masses. ===\n") @@ -709,7 +698,8 @@ calc_pepmasses2 <- function (aa_masses = NULL, #' #' @inheritParams calc_pepmasses2 find_aa_masses <- function(aa_masses = NULL, out_path = NULL, fixedmods = NULL, - varmods = NULL, varlabs = NULL, mod_motifs = NULL, + varmods = NULL, rm_dup_term_anywhere = TRUE, + varlabs = NULL, mod_motifs = NULL, maxn_vmods_setscombi = 64L) { file <- file.path(out_path, "aa_masses_all.rds") @@ -723,6 +713,7 @@ find_aa_masses <- function(aa_masses = NULL, out_path = NULL, fixedmods = NULL, aa_masses_all <- calc_aamasses(fixedmods = fixedmods, varmods = varmods, + rm_dup_term_anywhere = rm_dup_term_anywhere, aa_masses = aa_masses, varlabs = varlabs, mod_motifs = mod_motifs, @@ -946,6 +937,7 @@ calc_aamasses <- function (fixedmods = c("TMT6plex (K)", "Oxidation (M)", "Deamidated (N)", "Gln->pyro-Glu (N-term = Q)"), + rm_dup_term_anywhere = TRUE, aa_masses = NULL, varlabs = NULL, mod_motifs = NULL, maxn_vmods_setscombi = 64L, out_path = NULL) { @@ -1037,7 +1029,7 @@ calc_aamasses <- function (fixedmods = c("TMT6plex (K)", ## (3) add variable mods + NL varmods_comb <- find_aamasses_vmodscombi(varmods, f_to_v, anywhere_coerce_sites) - if (rm_dup_term_anywhere <- TRUE) { + if (rm_dup_term_anywhere) { oks <- unlist(lapply(varmods_comb, check_dup_term_any)) varmods_comb <- varmods_comb[oks] } @@ -2752,11 +2744,13 @@ ms1masses_bare <- function (seqs = NULL, aa_masses = NULL, ftmass = NULL, #' Adds Carbon-13 masses. -#' +#' +#' Not currently used. +#' #' @param peps A named vector of peptide sequences. Sequences in names and #' masses in values. #' @inheritParams matchMS -add_ms1_13c <- function (peps, n_13c = 1L, max_mass = 4500L) +add_ms1_13c <- function (peps, n_13c = 0L, max_mass = 4500L) { len <- length(n_13c) @@ -2765,7 +2759,7 @@ add_ms1_13c <- function (peps, n_13c = 1L, max_mass = 4500L) if (len == 1L && n_13c == 0L) return(peps) - + mass_13c <- 1.00335483 ns <- if (len == 1L) if (n_13c < 0L) n_13c:0L else 0:n_13c else n_13c len2 <- length(ns) @@ -2782,6 +2776,36 @@ add_ms1_13c <- function (peps, n_13c = 1L, max_mass = 4500L) } +#' Adds offsets of MS1 masses. +#' +#' Not currently used. +#' +#' @param peps A named vector of peptide sequences. Sequences in names and +#' masses in values. +#' @param masses The masses of notches. +#' @inheritParams matchMS +add_ms1_notches <- function (peps, masses = 0, max_mass = 4500L) +{ + if (!length(masses)) + return(peps) + + masses <- c(0, masses[abs(masses) > 1e-4]) + + if ((len <- length(masses)) == 1L) + return(peps) + + out <- vector("list", len) + out[[1]] <- peps + + for (i in 2:len) + out[[i]] <- peps + masses[[i]] + + out <- .Internal(unlist(out, recursive = FALSE, use.names = TRUE)) + + out[out <= max_mass] +} + + #' Helper of \link{ms1masses_bare}. #' #' For either forward or reversed sequences. diff --git a/R/ms2_gen.R b/R/ms2_gen.R index 20d1418..70aa9b1 100644 --- a/R/ms2_gen.R +++ b/R/ms2_gen.R @@ -103,8 +103,7 @@ gen_ms2ions_base <- function (aa_seq = NULL, ms1_mass = NULL, # dummy maxn_fnl_per_seq = 3L, maxn_vnl_per_seq = 3L, - maxn_vmods_sitescombi_per_pep = 64L, - digits = 4L) + maxn_vmods_sitescombi_per_pep = 64L) { aas <- .Internal(strsplit(aa_seq, "", fixed = TRUE, perl = FALSE, useBytes = FALSE)) aas <- .Internal(unlist(aas, recursive = FALSE, use.names = FALSE)) @@ -112,7 +111,7 @@ gen_ms2ions_base <- function (aa_seq = NULL, ms1_mass = NULL, naa <- length(aas) nm <- .Internal(paste0(list(rep_len("0", naa)), collapse = "", recycle0 = FALSE)) - af <- ms2ions_by_type(aam, ntmass, ctmass, type_ms2ions, digits) + af <- ms2ions_by_type(aam, ntmass, ctmass, type_ms2ions) av <- list(calc_rev_ms2(af, aas)) names(av) <- NA_character_ @@ -186,9 +185,7 @@ gen_ms2ions_a0_vnl0_fnl1 <- function (aa_seq, ms1_mass = NULL, # dummy maxn_vnl_per_seq = 3L, - - maxn_vmods_sitescombi_per_pep = 64L, - digits = 4L) + maxn_vmods_sitescombi_per_pep = 64L) { if (maxn_fnl_per_seq < 2L) return( @@ -202,8 +199,7 @@ gen_ms2ions_a0_vnl0_fnl1 <- function (aa_seq, ms1_mass = NULL, maxn_vmods_per_pep = maxn_vmods_per_pep, maxn_sites_per_vmod = maxn_sites_per_vmod, maxn_vmods_sitescombi_per_pep = - maxn_vmods_sitescombi_per_pep, - digits = digits)) + maxn_vmods_sitescombi_per_pep)) # (1, 2) "amods- tmod+ vnl- fnl-", "amods- tmod- vnl- fnl-" # (no pep_seq dispatching by Anywhere fmod residues -> possible no matched sites) @@ -227,8 +223,7 @@ gen_ms2ions_a0_vnl0_fnl1 <- function (aa_seq, ms1_mass = NULL, maxn_vmods_per_pep = maxn_vmods_per_pep, maxn_sites_per_vmod = maxn_sites_per_vmod, maxn_vmods_sitescombi_per_pep = - maxn_vmods_sitescombi_per_pep, - digits = digits)) + maxn_vmods_sitescombi_per_pep)) # (5, 6) "amods- tmod+ vnl- fnl+", "amods- tmod- vnl- fnl+" aas <- .Internal(strsplit(aa_seq, "", fixed = TRUE, perl = FALSE, useBytes = FALSE)) @@ -267,7 +262,7 @@ gen_ms2ions_a0_vnl0_fnl1 <- function (aa_seq, ms1_mass = NULL, av <- af <- vector("list", len) aam <- aa_masses[aas] - af[[1]] <- af1 <- ms2ions_by_type(aam, ntmass, ctmass, type_ms2ions, digits) + af[[1]] <- af1 <- ms2ions_by_type(aam, ntmass, ctmass, type_ms2ions) av[[1]] <- av1 <- calc_rev_ms2(af1, aas) if (len > 1L) { @@ -277,7 +272,7 @@ gen_ms2ions_a0_vnl0_fnl1 <- function (aa_seq, ms1_mass = NULL, for (i in 2:len) { fnl_combi_i <- ans[[i]] aami[idxes] <- aamii - fnl_combi_i - af[[i]] <- afi <- ms2ions_by_type(aami, ntmass, ctmass, type_ms2ions, digits) + af[[i]] <- afi <- ms2ions_by_type(aami, ntmass, ctmass, type_ms2ions) av[[i]] <- calc_rev_ms2(afi, aas) } } @@ -487,8 +482,7 @@ gen_ms2ions_a1_vnl0_fnl0 <- function (aa_seq, ms1_mass = NULL, aa_masses = NULL, # dummy maxn_fnl_per_seq = 3L, maxn_vnl_per_seq = 3L, - maxn_vmods_sitescombi_per_pep = 64L, - digits = 4L) + maxn_vmods_sitescombi_per_pep = 64L) { aas <- .Internal(strsplit(aa_seq, "", fixed = TRUE, perl = FALSE, useBytes = FALSE)) aas <- .Internal(unlist(aas, recursive = FALSE, use.names = FALSE)) @@ -520,7 +514,7 @@ gen_ms2ions_a1_vnl0_fnl0 <- function (aa_seq, ms1_mass = NULL, aa_masses = NULL, af <- calc_ms2ions_a1_vnl0_fnl0( M = M, P = P, aam = aam, aa_masses = aa_masses, ntmass = ntmass, ctmass = ctmass, type_ms2ions = type_ms2ions, - mod_indexes = mod_indexes, digits = digits) + mod_indexes = mod_indexes) } else { P <- find_vmodposM(M = ms2vmods, aas = aas, nmax = maxn_vmods_sitescombi_per_pep) @@ -529,7 +523,7 @@ gen_ms2ions_a1_vnl0_fnl0 <- function (aa_seq, ms1_mass = NULL, aa_masses = NULL, af <- lapply(split_matrix(M, by = "row"), calc_ms2ions_a1_vnl0_fnl0, P = P, aam = aam, aa_masses = aa_masses, ntmass = ntmass, ctmass = ctmass, type_ms2ions = type_ms2ions, - mod_indexes = mod_indexes, digits = digits) + mod_indexes = mod_indexes) af <- .Internal(unlist(af, recursive = FALSE, use.names = TRUE)) } @@ -547,8 +541,7 @@ gen_ms2ions_a1_vnl0_fnl0 <- function (aa_seq, ms1_mass = NULL, aa_masses = NULL, #' @inheritParams ms2ions_by_type #' @inheritParams add_var_masses calc_ms2ions_a1_vnl0_fnl0 <- function (M, P, aam, aa_masses, ntmass, ctmass, - type_ms2ions = "by", mod_indexes, - digits = 4L) + type_ms2ions = "by", mod_indexes) { ds <- aa_masses[M] nvm <- nrow(P) @@ -561,7 +554,7 @@ calc_ms2ions_a1_vnl0_fnl0 <- function (M, P, aam, aa_masses, ntmass, ctmass, vi <- P[i, ] aam_i <- aam aam_i[vi] <- aam_i[vi] + ds - out[[i]] <- ms2ions_by_type(aam_i, ntmass, ctmass, type_ms2ions, digits) + out[[i]] <- ms2ions_by_type(aam_i, ntmass, ctmass, type_ms2ions) h <- hx0 h[vi] <- mod_indexes[M] @@ -608,18 +601,23 @@ check_ms1_mass_vmods <- function (ms2vmods, aam, aa_masses, ntmod, ctmod, else if (ok_c) aa_masses[names(ctmod)] - bd <- bare + delta - - len <- length(ms2vmods) - ans <- vector("logical", len) + bd <- bare + delta + ans <- vector("logical", (len <- length(ms2vmods))) for (i in 1:len) { vi <- ms2vmods[[i]] - ans[i] <- if (length(vi)) - (abs(bd + sum(aa_masses[vi]) - ms1_mass) <= tol) + if (length(vi)) { + mi <- bd + sum(aa_masses[vi]) + ans[i] <- abs(mi - ms1_mass) <= tol + + # ans[i] <- if (length(ms1_offsets) == 1L && ms1_offsets == 0) + # abs(mi - ms1_mass) <= tol + # else + # any(abs(mi - ms1_mass + ms1_offsets) <= tol) + } else - FALSE + ans[i] <- FALSE } ans @@ -774,8 +772,7 @@ gen_ms2ions_a1_vnl0_fnl1 <- function (aa_seq = NULL, ms1_mass = NULL, maxn_fnl_per_seq = 3L, # dummy - maxn_vnl_per_seq = 3L, - digits = 4L) + maxn_vnl_per_seq = 3L) { if (maxn_fnl_per_seq < 2L) return( @@ -789,8 +786,7 @@ gen_ms2ions_a1_vnl0_fnl1 <- function (aa_seq = NULL, ms1_mass = NULL, maxn_vmods_per_pep = maxn_vmods_per_pep, maxn_sites_per_vmod = maxn_sites_per_vmod, maxn_vmods_sitescombi_per_pep = - maxn_vmods_sitescombi_per_pep, - digits = digits)) + maxn_vmods_sitescombi_per_pep)) # (7, 8) "amods+ tmod- vnl- fnl-", "amods+ tmod+ vnl- fnl-" # (no pep_seq dispatching by fmod residues -> possible no matched sites) @@ -813,8 +809,7 @@ gen_ms2ions_a1_vnl0_fnl1 <- function (aa_seq = NULL, ms1_mass = NULL, maxn_vmods_per_pep = maxn_vmods_per_pep, maxn_sites_per_vmod = maxn_sites_per_vmod, maxn_vmods_sitescombi_per_pep = - maxn_vmods_sitescombi_per_pep, - digits = digits)) + maxn_vmods_sitescombi_per_pep)) # (11, 12) "amods+ tmod- vnl- fnl+", "amods+ tmod+ vnl- fnl+" aas <- .Internal(strsplit(aa_seq, "", fixed = TRUE, perl = FALSE, useBytes = FALSE)) @@ -863,8 +858,7 @@ gen_ms2ions_a1_vnl0_fnl1 <- function (aa_seq = NULL, ms1_mass = NULL, af <- calc_ms2ions_a1_vnl0_fnl1( M = M, P = P, fnl_combi = fnl_combi, fnl_idxes = fnl_idxes, aam = aam, aa_masses = aa_masses, ntmass = ntmass, - ctmass = ctmass, type_ms2ions = type_ms2ions, mod_indexes = mod_indexes, - digits = digits) + ctmass = ctmass, type_ms2ions = type_ms2ions, mod_indexes = mod_indexes) } else { P <- find_vmodposM(M = ms2vmods, aas = aas, nmax = maxn_vmods_sitescombi_per_pep) @@ -874,7 +868,7 @@ gen_ms2ions_a1_vnl0_fnl1 <- function (aa_seq = NULL, ms1_mass = NULL, P = P, fnl_combi = fnl_combi, fnl_idxes = fnl_idxes, aam = aam, aa_masses = aa_masses, ntmass = ntmass, ctmass = ctmass, type_ms2ions = type_ms2ions, - mod_indexes = mod_indexes, digits = digits) + mod_indexes = mod_indexes) af <- .Internal(unlist(af, recursive = FALSE, use.names = TRUE)) } @@ -895,8 +889,7 @@ gen_ms2ions_a1_vnl0_fnl1 <- function (aa_seq = NULL, ms1_mass = NULL, #' @inheritParams hms1_a0_vnl0_fnl1 calc_ms2ions_a1_vnl0_fnl1 <- function (M, P, fnl_combi, fnl_idxes, aam, aa_masses, ntmass, ctmass, - type_ms2ions = "by", mod_indexes, - digits = 4L) + type_ms2ions = "by", mod_indexes) { ds <- aa_masses[M] nvm <- nrow(P) @@ -914,7 +907,7 @@ calc_ms2ions_a1_vnl0_fnl1 <- function (M, P, fnl_combi, fnl_idxes, aam_i[vi] <- aam_i[vi] + ds # the first fnl are all 0's - out[[r]] <- ms2ions_by_type(aam_i, ntmass, ctmass, type_ms2ions, digits) + out[[r]] <- ms2ions_by_type(aam_i, ntmass, ctmass, type_ms2ions) r <- r + 1L if (nnl > 1L) { @@ -922,7 +915,7 @@ calc_ms2ions_a1_vnl0_fnl1 <- function (M, P, fnl_combi, fnl_idxes, aam_j <- aam_i delta_nl <- .Internal(unlist(fnl_combi[[j]], recursive = FALSE, use.names = FALSE)) aam_j[fnl_idxes] <- aam_j[fnl_idxes] - delta_nl - out[[r]] <- ms2ions_by_type(aam_j, ntmass, ctmass, type_ms2ions, digits) + out[[r]] <- ms2ions_by_type(aam_j, ntmass, ctmass, type_ms2ions) r <- r + 1L } } @@ -1156,8 +1149,7 @@ gen_ms2ions_a1_vnl1_fnl0 <- function (aa_seq = NULL, ms1_mass = NULL, # dummy maxn_fnl_per_seq = 3L, - maxn_vnl_per_seq = 3L, - digits = 4L) + maxn_vnl_per_seq = 3L) { if (maxn_vnl_per_seq < 2L) return(gen_ms2ions_a1_vnl0_fnl0(aa_seq = aa_seq, ms1_mass = ms1_mass, @@ -1170,8 +1162,7 @@ gen_ms2ions_a1_vnl1_fnl0 <- function (aa_seq = NULL, ms1_mass = NULL, maxn_vmods_per_pep = maxn_vmods_per_pep, maxn_sites_per_vmod = maxn_sites_per_vmod, maxn_vmods_sitescombi_per_pep = - maxn_vmods_sitescombi_per_pep, - digits = digits)) + maxn_vmods_sitescombi_per_pep)) aas <- .Internal(strsplit(aa_seq, "", fixed = TRUE, perl = FALSE, useBytes = FALSE)) aas <- .Internal(unlist(aas, recursive = FALSE, use.names = FALSE)) @@ -1203,7 +1194,7 @@ gen_ms2ions_a1_vnl1_fnl0 <- function (aa_seq = NULL, ms1_mass = NULL, af <- calc_ms2ions_a1_vnl0_fnl0( M = M, P = P, aam = aam, aa_masses = aa_masses, ntmass = ntmass, ctmass = ctmass, type_ms2ions = type_ms2ions, - mod_indexes = mod_indexes, digits = digits) + mod_indexes = mod_indexes) else { nnl <- min(maxn_vmods_sitescombi_per_pep %/% nP, maxn_vnl_per_seq) @@ -1211,7 +1202,7 @@ gen_ms2ions_a1_vnl1_fnl0 <- function (aa_seq = NULL, ms1_mass = NULL, af <- calc_ms2ions_a1_vnl0_fnl0( M = M, P = P, aam = aam, aa_masses = aa_masses, ntmass = ntmass, ctmass = ctmass, type_ms2ions = type_ms2ions, - mod_indexes = mod_indexes, digits = digits) + mod_indexes = mod_indexes) else af <- calc_ms2ions_a1_vnl1_fnl0( N = expand_grid_rows(vmods_nl[ms2vmods], nmax = nnl, use.names = FALSE), @@ -1222,8 +1213,7 @@ gen_ms2ions_a1_vnl1_fnl0 <- function (aa_seq = NULL, ms1_mass = NULL, ntmass = ntmass, ctmass = ctmass, type_ms2ions = type_ms2ions, - mod_indexes = mod_indexes, - digits = digits) + mod_indexes = mod_indexes) } } else { @@ -1241,13 +1231,13 @@ gen_ms2ions_a1_vnl1_fnl0 <- function (aa_seq = NULL, ms1_mass = NULL, af <- calc_ms2ions_a1_vnl0_fnl0( M = M, P = P, aam = aam, aa_masses = aa_masses, ntmass = ntmass, ctmass = ctmass, type_ms2ions = type_ms2ions, - mod_indexes = mod_indexes, digits = digits) + mod_indexes = mod_indexes) } else { af <- lapply(split_matrix(M, by = "row"), calc_ms2ions_a1_vnl0_fnl0, P = P, aam = aam, aa_masses = aa_masses, ntmass = ntmass, ctmass = ctmass, type_ms2ions = type_ms2ions, - mod_indexes = mod_indexes, digits = digits) + mod_indexes = mod_indexes) af <- .Internal(unlist(af, recursive = FALSE, use.names = TRUE)) } } @@ -1259,7 +1249,7 @@ gen_ms2ions_a1_vnl1_fnl0 <- function (aa_seq = NULL, ms1_mass = NULL, af <- lapply(M, calc_ms2ions_a1_vnl0_fnl0, P = P, aam = aam, aa_masses = aa_masses, ntmass = ntmass, ctmass = ctmass, type_ms2ions = type_ms2ions, - mod_indexes = mod_indexes, digits = digits) + mod_indexes = mod_indexes) af <- .Internal(unlist(af, recursive = FALSE, use.names = TRUE)) } else { @@ -1282,8 +1272,7 @@ gen_ms2ions_a1_vnl1_fnl0 <- function (aa_seq = NULL, ms1_mass = NULL, ntmass = ntmass, ctmass = ctmass, type_ms2ions = type_ms2ions, - mod_indexes = mod_indexes, - digits = digits + mod_indexes = mod_indexes ), SIMPLIFY = FALSE, USE.NAMES = FALSE) @@ -1315,8 +1304,7 @@ gen_ms2ions_a1_vnl1_fnl0 <- function (aa_seq = NULL, ms1_mass = NULL, #' @inheritParams matchMS calc_ms2ions_a1_vnl1_fnl0 <- function (N, M, P, aam, aa_masses, ntmass, ctmass, - type_ms2ions = "by", mod_indexes, - digits = 4L) + type_ms2ions = "by", mod_indexes) { ds <- aa_masses[M] nnl <- length(N) @@ -1336,7 +1324,7 @@ calc_ms2ions_a1_vnl1_fnl0 <- function (N, M, P, aam, aa_masses, aam_j <- aam_i delta_nl <- .Internal(unlist(N[[j]], recursive = FALSE, use.names = FALSE)) aam_j[vi] <- aam_j[vi] - delta_nl - out[[r]] <- ms2ions_by_type(aam_j, ntmass, ctmass, type_ms2ions, digits) + out[[r]] <- ms2ions_by_type(aam_j, ntmass, ctmass, type_ms2ions) r <- r + 1L } diff --git a/R/ms2frames.R b/R/ms2frames.R index ce55bab..4a44468 100644 --- a/R/ms2frames.R +++ b/R/ms2frames.R @@ -2,6 +2,8 @@ #' #' @param mgf_path The path to MGF files #' @param n_modules The number of modules (\code{length(aa_masses_all)}) or one +#' @param ms1_offsets Off-sets in precursor masses (in relative to the values in +#' the original MGFs). #' @param .path_bin The path to binned theoretical masses #' @param ppm_ms1_bin The tolerance in precursor mass error after mass binning. #' @param by_modules Logical; if TRUE, results are saved with one mgf to one @@ -9,9 +11,10 @@ #' modules #' @inheritParams ms2match #' @inheritParams matchMS -pair_mgftheo <- function (mgf_path, n_modules, .path_bin, by_modules = TRUE, - reframe_mgfs = FALSE, min_mass = 200L, - ppm_ms1_bin = 10L, first_search = FALSE) +pair_mgftheos <- function (mgf_path, n_modules, ms1_offsets = 0, + by_modules = TRUE, min_mass = 200L, max_mass = 4500L, + ppm_ms1_bin = 10L, .path_bin, reframe_mgfs = FALSE, + first_search = FALSE) { message("Pairing experimental and theoretical data.") @@ -22,15 +25,14 @@ pair_mgftheo <- function (mgf_path, n_modules, .path_bin, by_modules = TRUE, if (length(tempfiles)) unlink(tempfiles) - - # MGFs (in data frame) split by frame indexes + mgf_files <- list.files(mgf_path, pattern = "^mgf_queries_\\d+\\.rds$", full.names = TRUE) - mgf_frames <- lapply(mgf_files, qs::qread) - - # for MGF calibrations + mgfs <- lapply(mgf_files, qs::qread) + + # data thinning for MGF calibrations if (first_search) { - mgf_frames <- lapply(mgf_frames, function (x) { + mgfs <- lapply(mgfs, function (x) { min_mgfmass <- min(x$ms1_mass, na.rm = TRUE) max_mgfmass <- max(x$ms1_mass, na.rm = TRUE) oks_min <- with(x, ms1_mass <= min_mgfmass + 10L) @@ -44,21 +46,45 @@ pair_mgftheo <- function (mgf_path, n_modules, .path_bin, by_modules = TRUE, }) } - mgf_frames <- dplyr::bind_rows(mgf_frames) - - if (reframe_mgfs) { - mgf_frames[["frame"]] <- - find_ms1_interval(mgf_frames[["ms1_mass"]], from = min_mass, - ppm = ppm_ms1_bin) - } + ## output expttheo_1_1.rds: expttheo_module[_notch].rds + mgfs <- dplyr::bind_rows(mgfs) + + mapply(hpair_mgths, ms1_offsets, seq_along(ms1_offsets), + MoreArgs = list( + mgfs = mgfs, n_modules = n_modules, by_modules = by_modules, + mgf_path = mgf_path, min_mass = min_mass, max_mass = max_mass, + ppm_ms1_bin = ppm_ms1_bin, .path_bin = .path_bin), + SIMPLIFY = FALSE, USE.NAMES = FALSE) + + invisible(NULL) +} + - mgf_frames <- dplyr::group_by(mgf_frames, frame) - mgf_frames <- dplyr::group_split(mgf_frames) - fr_names <- lapply(mgf_frames, function (x) x[["frame"]][1]) - names(mgf_frames) <- unlist(fr_names, recursive = FALSE, use.names = FALSE) +#' Helper of \link{pair_mgftheos}. +#' +#' @param ms1_offset The ms1 offset. +#' @param notch The index assigned to an ms1_offset. +#' @param mgfs MGF data. +#' @inheritParams pair_mgftheos +hpair_mgths <- function (ms1_offset = 0, notch = NULL, mgfs, n_modules, + by_modules = TRUE, mgf_path, min_mass = 200L, + max_mass = 4500L, ppm_ms1_bin = 10L, .path_bin) +{ + if (abs(ms1_offset) > 1e-4) { + mgfs <- if (ms1_offset > 0) + mgfs[with(mgfs, ms1_mass <= max_mass - ms1_offset), ] + else + mgfs[with(mgfs, ms1_mass >= min_mass - ms1_offset), ] + + mgfs[["ms1_mass"]] <- mgfs[["ms1_mass"]] - ms1_offset + } - # -> chunks: each chunk has multiple frames: each frame multiple precursors - ranges <- seq_along(mgf_frames) + mgfs[["pep_ms1_offset"]] <- ms1_offset + mgfs <- split(mgfs, find_ms1_interval(mgfs[["ms1_mass"]], from = min_mass, + ppm = ppm_ms1_bin)) + + # to chunks: each chunk has multiple frames: each frame multiple precursors + ranges <- seq_along(mgfs) n_chunks <- if (n_modules == 1L || by_modules) min(detect_cores(96L)^2, 1024L) @@ -67,15 +93,15 @@ pair_mgftheo <- function (mgf_path, n_modules, .path_bin, by_modules = TRUE, else min(length(ranges), length(mgf_files) * n_modules) - labs <- levels(cut(ranges, n_chunks)) - lower <- floor(as.numeric( sub("\\((.+),.*", "\\1", labs))) - grps <- findInterval(ranges, lower) - mgf_frames <- split(mgf_frames, grps) - rm(list = c("fr_names", "ranges", "labs", "lower", "grps")) - - # (1) splits `theos` in accordance to `mgf_frames` with - # preceding and following frames: (o)|range of mgf_frames[[1]]|(o) - mfrs <- lapply(mgf_frames, function (x) as.integer(names(x))) + labs <- levels(cut(ranges, n_chunks)) + lwrs <- floor(as.numeric( sub("\\((.+),.*", "\\1", labs))) + grps <- findInterval(ranges, lwrs) + mgfs <- split(mgfs, grps) + rm(list = c("ranges", "labs", "lwrs", "grps")) + + # (1) splits `theos` in accordance to `mgfs` with + # preceding and following frames: (o)|range of mgfs[[1]]|(o) + mfrs <- lapply(mgfs, function (x) as.integer(names(x))) mins <- lapply(mfrs, function (x) if (length(x)) min(x, na.rm = TRUE) else 0L) mins <- .Internal(unlist(mins, recursive = FALSE, use.names = FALSE)) maxs <- lapply(mfrs, function (x) if (length(x)) max(x, na.rm = TRUE) else 0L) @@ -90,58 +116,80 @@ pair_mgftheo <- function (mgf_path, n_modules, .path_bin, by_modules = TRUE, if (is.null(theos)) next - theos <- lapply(theos, function (x) x[, c("pep_seq", "mass")]) thfrs <- as.integer(names(theos)) - # separates into intervals (intersecting mgf_frames) + # separates into intervals (intersecting mgfs) anstheo[[i]] <- mapply(function (x, y) { theos[which(thfrs >= (x - 1L) & thfrs <= (y + 1L))] }, mins, maxs, SIMPLIFY = FALSE, USE.NAMES = FALSE) } + # rm(list = c("theos", "mfrs", "thfrs", "mins", "maxs")) - rm(list = c("theos", "mfrs", "thfrs", "mins", "maxs")) - - # (2) removes mgf_frames[[i]] not be found in anstheo[[i]] - for (i in seq_along(mgf_frames)) { - mi <- mgf_frames[[i]] - fmi <- names(mi) - ti <- lapply(anstheo, `[[`, i) # theos at chunk[[i]] (for all modules) + # (2) removes mgfs[[i]] not be found in anstheo[[i]] + for (i in seq_along(mgfs)) { + mi <- mgfs[[i]] + fi <- names(mi) + ti <- lapply(anstheo, `[[`, i) # theos at chunk[[i]] (for all modules) # expt frames found in (any) theo module - oks <- lapply(ti, function (x) fmi %in% names(x)) + oks <- lapply(ti, function (x) fi %in% names(x)) oks <- Reduce(`|`, oks) - mgf_frames[[i]] <- mi[oks] + mgfs[[i]] <- mi[oks] + } + # rm(list = c("mi", "ti", "fi", "oks")) + + if (length(mgfs) == 1L && !length(mgfs[[1]])) { + mgfs[1] <- list(NULL) } - rm(list = c("mi", "ti", "fmi", "oks")) # (3) removes unused frames of `anstheo` - # (mgf_frames determines the length of each anstheo[[i]]; - # more effective to first generate all bracketed mgf_frames indexes and apply + # (mgfs determines the length of each anstheo[[i]]; + # more effective to first generate all bracketed mgfs indexes and apply # the same set of indexes to each anstheo[[i]]) for (i in seq_along(anstheo)) - anstheo[[i]] <- mapply(subset_theoframes, mgf_frames, anstheo[[i]], + anstheo[[i]] <- mapply(subset_theoframes, mgfs, anstheo[[i]], SIMPLIFY = FALSE, USE.NAMES = FALSE) # (4) reverses the order (longer/heavier peptides towards the beginning) # do the difficult ones first when paralleling with LB ord <- rev(seq_along(anstheo[[1]])) - mgf_frames <- mgf_frames[ord] + mgfs <- mgfs[ord] anstheo <- lapply(anstheo, function (x) x[ord]) # (5) outputs (in chunks) if (by_modules) { for (i in seq_len(n_modules)) - qs::qsave(list(mgf_frames = mgf_frames, theopeps = anstheo[[i]]), - file.path(mgf_path, paste0("expttheo_", i, ".rds")), + qs::qsave(list(mgf_frames = mgfs, theopeps = anstheo[[i]]), + file.path(mgf_path, paste0("expttheo_", i, "_", notch, ".rds")), preset = "fast") } else { - for (i in seq_along(mgf_frames)) - qs::qsave(list(mgf_frames = mgf_frames[[i]], + for (i in seq_along(mgfs)) + qs::qsave(list(mgf_frames = mgfs[[i]], theopeps = lapply(anstheo, `[[`, i)), file.path(mgf_path, paste0("mgftheo_", i, ".rds")), preset = "fast") } + + # next version: note that mgfs are the same for different modules + # also save by fractions -> avoid sendData in parallel by read from disk + if (FALSE) { + if (by_modules) ( + # expt_notch_fraction.rds + # theo_module_notch_fraction.rds + mapply(function (data, frc) { + fi <- file.path(mgf_path, paste0("expt_", 0, "_", notch, "_", frc, ".rds")) + qs::qsave(data, fi, preset = "fast") + }, mgfs, seq_along(mgfs)) + ) + else { + for (i in seq_along(mgfs)) + qs::qsave(list(mgf_frames = mgfs[[i]], + theopeps = lapply(anstheo, `[[`, i)), + file.path(mgf_path, paste0("mgftheo_", i, ".rds")), + preset = "fast") + } + } invisible(NULL) } @@ -162,7 +210,7 @@ pair_mgftheo <- function (mgf_path, n_modules, .path_bin, by_modules = TRUE, #' modifications #' @param df0 An output template with zero rows #' @inheritParams matchMS -#' @inheritParams pair_mgftheo +#' @inheritParams pair_mgftheos hms2match <- function (aa_masses_all, funs_ms2, ms1vmods_all, ms2vmods_all, mod_indexes, mgf_path, out_path, type_ms2ions = "by", maxn_vmods_per_pep = 5L, @@ -171,13 +219,11 @@ hms2match <- function (aa_masses_all, funs_ms2, ms1vmods_all, ms2vmods_all, maxn_vmods_sitescombi_per_pep = 64L, minn_ms2 = 6L, ppm_ms1 = 10L, ppm_ms2 = 10L, min_ms2mass = 115L, index_mgf_ms2 = FALSE, - by_modules = FALSE, df0 = NULL, digits = 4L) + by_modules = FALSE, df0 = NULL) { - pat <- if (by_modules) "^expttheo_" else "^mgftheo_" - mgths <- list.files(mgf_path, pattern = paste0(pat, "[0-9]+")) - ord <- order(as.integer(gsub(paste0(pat, "([0-9]+)\\.rds$"), "\\1", mgths))) - mgths <- mgths[ord] - + pat <- if (by_modules) "^expttheo" else "^mgftheo" + mgths <- order_fracs(type = pat, tempdir = mgf_path, by_modules = by_modules) + message("\n=== MS2 ion searches started at ", Sys.time(), ". ===\n") n_cores <- detect_cores(96L) - 1L @@ -195,18 +241,17 @@ hms2match <- function (aa_masses_all, funs_ms2, ms1vmods_all, ms2vmods_all, "vec_to_list", "split_matrix", "check_ms1_mass_vmods", "calc_ms2ions_a1_vnl0_fnl0", "calc_ms2ions_a1_vnl0_fnl1", "calc_ms2ions_a1_vnl1_fnl0", "ms2ions_by_type", - "byions", "czions", "axions", "bions_base", "yions_base", - "cions_base", "zions_base", "aions_base", "xions_base", - "find_ms2_bypep", "fuzzy_match_one", - "fuzzy_match_one2", "post_frame_adv"), + "byions", "czions", "axions", + "find_ms2_bypep", "post_frame_adv"), envir = environment(mzion::matchMS)) if (by_modules) { parallel::clusterExport(cl, c("frames_adv"), envir = environment(mzion::matchMS)) - + for (i in seq_along(aa_masses_all)) - ms2match_one( + hms2match_one( pep_mod_group = i, + mgths = mgths[[i]], aa_masses = aa_masses_all[[i]], FUN = funs_ms2[[i]], ms1vmods = ms1vmods_all[[i]], @@ -223,7 +268,7 @@ hms2match <- function (aa_masses_all, funs_ms2, ms1vmods_all, ms2vmods_all, maxn_vmods_sitescombi_per_pep = maxn_vmods_sitescombi_per_pep, minn_ms2 = minn_ms2, ppm_ms1 = ppm_ms1, ppm_ms2 = ppm_ms2, min_ms2mass = min_ms2mass, index_mgf_ms2 = index_mgf_ms2, - df0 = df0, digits = digits) + df0 = df0) } else { message("Check search progress at: ", logs) @@ -250,8 +295,7 @@ hms2match <- function (aa_masses_all, funs_ms2, ms1vmods_all, ms2vmods_all, ppm_ms2 = ppm_ms2, min_ms2mass = min_ms2mass, index_mgf_ms2 = index_mgf_ms2, - df0 = df0, - digits = digits) + df0 = df0) } parallel::stopCluster(cl) @@ -277,7 +321,7 @@ ms2match_all <- function (mgth, aa_masses_all, funs_ms2, ms1vmods_all, maxn_vmods_sitescombi_per_pep = 64L, minn_ms2 = 6L, ppm_ms1 = 10L, ppm_ms2 = 10L, min_ms2mass = 115L, index_mgf_ms2 = FALSE, - df0 = NULL, digits = 4L) + df0 = NULL) { msg <- paste0("Matching expt-theo pair: ", mgth) write(msg, stdout()) @@ -311,8 +355,7 @@ ms2match_all <- function (mgth, aa_masses_all, funs_ms2, ms1vmods_all, ppm_ms1 = ppm_ms1, ppm_ms2 = ppm_ms2, min_ms2mass = min_ms2mass, - index_mgf_ms2 = index_mgf_ms2, - digits = digits) + index_mgf_ms2 = index_mgf_ms2) if (!dir.exists(tempdir <- file.path(out_path, "temp"))) create_dir(tempdir) @@ -371,10 +414,10 @@ mframes_adv <- function (mgf_frames = NULL, theopeps = NULL, maxn_vnl_per_seq = 3L, maxn_vmods_sitescombi_per_pep = 64L, minn_ms2 = 6L, ppm_ms1 = 10L, ppm_ms2 = 10L, - min_ms2mass = 115L, index_mgf_ms2 = FALSE, - digits = 4L) + min_ms2mass = 115L, index_mgf_ms2 = FALSE) { lenm <- length(mgf_frames) + frames <- as.integer(names(mgf_frames)) if (!lenm) return(NULL) @@ -400,7 +443,7 @@ mframes_adv <- function (mgf_frames = NULL, theopeps = NULL, ## --- initiation --- mgfs_cr <- mgf_frames[[1]] - frame <- mgfs_cr[["frame"]][1] + frame <- frames[1] thaf_ms2s <- thcr_ms2s <- thbf_ms2s <- thaf_peps <- thcr_peps <- thbf_peps <- @@ -441,8 +484,7 @@ mframes_adv <- function (mgf_frames = NULL, theopeps = NULL, maxn_sites_per_vmod = maxn_sites_per_vmod, maxn_fnl_per_seq = maxn_fnl_per_seq, maxn_vnl_per_seq = maxn_vnl_per_seq, - maxn_vmods_sitescombi_per_pep = maxn_vmods_sitescombi_per_pep, - digits = digits + maxn_vmods_sitescombi_per_pep = maxn_vmods_sitescombi_per_pep ), SIMPLIFY = FALSE, USE.NAMES = FALSE @@ -482,8 +524,7 @@ mframes_adv <- function (mgf_frames = NULL, theopeps = NULL, maxn_sites_per_vmod = maxn_sites_per_vmod, maxn_fnl_per_seq = maxn_fnl_per_seq, maxn_vnl_per_seq = maxn_vnl_per_seq, - maxn_vmods_sitescombi_per_pep = maxn_vmods_sitescombi_per_pep, - digits = digits + maxn_vmods_sitescombi_per_pep = maxn_vmods_sitescombi_per_pep ), SIMPLIFY = FALSE, USE.NAMES = FALSE @@ -530,8 +571,7 @@ mframes_adv <- function (mgf_frames = NULL, theopeps = NULL, maxn_sites_per_vmod = maxn_sites_per_vmod, maxn_fnl_per_seq = maxn_fnl_per_seq, maxn_vnl_per_seq = maxn_vnl_per_seq, - maxn_vmods_sitescombi_per_pep = maxn_vmods_sitescombi_per_pep, - digits = digits + maxn_vmods_sitescombi_per_pep = maxn_vmods_sitescombi_per_pep ), SIMPLIFY = FALSE, USE.NAMES = FALSE @@ -581,7 +621,7 @@ mframes_adv <- function (mgf_frames = NULL, theopeps = NULL, # advance to the next frame mgfs_cr <- mgf_frames[[i+1]] - new_frame <- mgfs_cr[["frame"]][1] + new_frame <- frames[i+1L] if (isTRUE(new_frame == (frame + 1L))) { cri <- cri + 1L @@ -635,8 +675,7 @@ mframes_adv <- function (mgf_frames = NULL, theopeps = NULL, maxn_sites_per_vmod = maxn_sites_per_vmod, maxn_fnl_per_seq = maxn_fnl_per_seq, maxn_vnl_per_seq = maxn_vnl_per_seq, - maxn_vmods_sitescombi_per_pep = maxn_vmods_sitescombi_per_pep, - digits = digits + maxn_vmods_sitescombi_per_pep = maxn_vmods_sitescombi_per_pep ), SIMPLIFY = FALSE, USE.NAMES = FALSE @@ -680,8 +719,7 @@ mframes_adv <- function (mgf_frames = NULL, theopeps = NULL, maxn_sites_per_vmod = maxn_sites_per_vmod, maxn_fnl_per_seq = maxn_fnl_per_seq, maxn_vnl_per_seq = maxn_vnl_per_seq, - maxn_vmods_sitescombi_per_pep = maxn_vmods_sitescombi_per_pep, - digits = digits + maxn_vmods_sitescombi_per_pep = maxn_vmods_sitescombi_per_pep ), SIMPLIFY = FALSE, USE.NAMES = FALSE @@ -719,8 +757,7 @@ mframes_adv <- function (mgf_frames = NULL, theopeps = NULL, maxn_sites_per_vmod = maxn_sites_per_vmod, maxn_fnl_per_seq = maxn_fnl_per_seq, maxn_vnl_per_seq = maxn_vnl_per_seq, - maxn_vmods_sitescombi_per_pep = maxn_vmods_sitescombi_per_pep, - digits = digits + maxn_vmods_sitescombi_per_pep = maxn_vmods_sitescombi_per_pep ), SIMPLIFY = FALSE, USE.NAMES = FALSE @@ -737,93 +774,6 @@ mframes_adv <- function (mgf_frames = NULL, theopeps = NULL, } -#' Fuzzy matches with a +/-1 window. -#' -#' Not used but called the codes inside directly. -#' -#' @param x A vector to be matched. -#' @param y A vector to be matched against. -#' @importFrom fastmatch fmatch %fin% -#' @examples -#' library(mzion) -#' -#' ans1 <- mzion:::fuzzy_match_one(c(74953, 74955), rep(74954, 2)) -#' ans2 <- mzion:::fuzzy_match_one(c(74953, 74955), 74954) -#' -#' stopifnot(identical(ans1, ans2)) -#' stopifnot(ans1 == c(TRUE, TRUE)) -fuzzy_match_one <- function (x, y) -{ - mi <- x %fin% y - bf <- (x - 1L) %fin% y - af <- (x + 1L) %fin% y - - mi | bf | af -} - - -#' Fuzzy matches with a +/-1 window. -#' -#' No multiple dipping of \code{y} matches. A \code{y} value will be removed (or -#' became 0) if matched, -#' -#' @param x A vector to be matched. -#' @param y A vector to be matched against. -#' @importFrom fastmatch fmatch %fin% -#' @examples -#' library(mzion) -#' -#' ans1 <- mzion:::fuzzy_match_one2(c(74953, 74955), rep(74954, 2)) -#' ans2 <- mzion:::fuzzy_match_one2(c(74953, 74955), 74954) -#' -#' stopifnot(identical(ans1, ans2)) -#' stopifnot(ans1 == c(FALSE, TRUE)) -#' -#' ans3 <- mzion:::fuzzy_match_one2(c(74953, 74955, 80000), c(74955, 80000)) -#' -#' ## The x3 example from "find_ms2_bypep" -#' x <- c(-9185, -3369, -1973, -626, 59, 714, 3326, 7106, 7711, 7715, 8316, 8320, -#' 8916, 8920, 9511, 9515, 10102, 10688, 11211, 12945, 16807, 24001, 24481, -#' 31480, 32350, 32805, 37050, 37875, 42986, 53028, 53377, 53711, 56940, 58542, -#' 59172, 61310, 62482, 70941, 73801, 77575, 78046, 78047, 84120, 85881, 89313, -#' 91185, 96328, 101503, 102916, 104302, 113257, 113411, 116563, 118593, -#' 121336, 121405, 121474, 123450, 123841, 125826, 127823, 130750, 131786, -#' 131842, 131903, 134568, 135267, 135956, 139090, 139200, 146310, 146801, -#' 146902, 149442, 152081, 152174, 153544, 153635, 160913, 160995, 161078, -#' 162794, 162875, 163036, 163117, 163191, 163271, 168686, 169869, 169943, -#' 173741, 173812, 173951, 174856, 174922, 174990, 175059, 175128, 175197, -#' 175266) -#' -#' aas <- unlist(strsplit("SLAAEEEAAR", "")) -#' -#' y <- c(317.2022, 430.2863, 501.3234, 572.3605, 701.4031, -#' 830.4457, 959.4883, 1030.5254, 1101.5625, 1257.6636, -#' 175.1190, 246.1561, 317.1932, 446.2358, 575.2784, -#' 704.3210, 775.3581, 846.3952, 959.4793, 1046.5113) -#' -#' names(y) <- c(aas, rev(aas)) -#' -#' ppm_ms2 <- 13L -#' min_ms2mass <- 115L -#' d <- ppm_ms2/1E6 -#' y <- ceiling(log(y/min_ms2mass)/log(1+d)) -#' -#' ans <- mzion:::fuzzy_match_one2(x, y) -fuzzy_match_one2 <- function (x, y) -{ - mi <- x %fin% y - if (any(mi)) y[y %fin% x[mi]] <- 0L - - x2 <- x - 1L - bf <- x2 %fin% y - if (any(bf)) y[y %fin% x2[bf]] <- 0L - - af <- (x + 1L) %fin% y - - mi | bf | af -} - - #' Helper: matches between theoretical and experimental MS2 ions. #' #' @param expts Numeric vector; one series of experimental MS2s. @@ -1031,13 +981,12 @@ fuzzy_match_one2 <- function (x, y) #' ex <- ceiling(log(expts/min_ms2mass)/log(1+d)) #' #' x5 <- mzion:::find_ms2_bypep(theos, expts, ex, d, ppm_ms2) -#' #' } #' #' @return Lists of (1) theo, (2) expt, (3) ith, (4) iex and (5) m. find_ms2_bypep <- function (theos = NULL, expts = NULL, ex = NULL, d = NULL, - ppm_ms2 = 10L, min_ms2mass = 115L, minn_ms2 = 6L, - index_mgf_ms2 = FALSE) + ppm_ms2 = 10L, min_ms2mass = 115L, minn_ms2 = 6L, + index_mgf_ms2 = FALSE) { # `theos` # the same pep_seq at different applicable ivmods and NLs @@ -1186,7 +1135,7 @@ find_ms2_bypep <- function (theos = NULL, expts = NULL, ex = NULL, d = NULL, #' value at \code{by_modules = TRUE} #' @inheritParams matchMS #' @inheritParams load_mgfs -#' @inheritParams pair_mgftheo +#' @inheritParams pair_mgftheos #' @examples #' \donttest{ #' library(mzion) @@ -1248,7 +1197,7 @@ search_mgf <- function (expt_mass_ms1 = NULL, expt_moverz_ms2 = NULL, # (1) within a list: removes vmods+ positions that are NULL (< minn_ms2) # (no effects on vmods-; need `type` info if to limit to vmods+) oks <- lapply(ans, function (this) { - oks <- lapply(this, function (x) !is.null(x$theo)) + oks <- lapply(this, function (x) !is.null(x[["theo"]])) .Internal(unlist(oks, recursive = FALSE, use.names = FALSE)) }) @@ -1325,6 +1274,92 @@ search_mgf <- function (expt_mass_ms1 = NULL, expt_moverz_ms2 = NULL, #' #' For a single module #' +#' @param pep_mod_group The index of peptide modification groups. +#' @param mgths Pairs of experimental and theoretical data. +#' @param aa_masses An amino-acid look-up. +#' @param FUN A function, e.g., \link{gen_ms2ions_base}, with an i-th module of +#' \code{aa_masses}. +#' @param ms1vmods All possible labels of MS1 variable modifications with +#' an i-th \code{aa_masses}. +#' @param ms2vmods All possible labels of MS2 variable modifications with +#' an i-t \code{aa_masses}. +#' @param cl The value of clusters for parallel processes. +#' @param df0 An output template. +#' @inheritParams hms2match +hms2match_one <- function (pep_mod_group, mgths, aa_masses, FUN, + ms1vmods, ms2vmods, cl, + mod_indexes, mgf_path, out_path, type_ms2ions = "by", + maxn_vmods_per_pep = 5L, maxn_sites_per_vmod = 3L, + maxn_fnl_per_seq = 3L, maxn_vnl_per_seq = 3L, + maxn_vmods_sitescombi_per_pep = 64L, + minn_ms2 = 6L, ppm_ms1 = 10L, ppm_ms2 = 10L, + min_ms2mass = 115L, index_mgf_ms2 = FALSE, + df0 = NULL) +{ + nm_fmods <- attr(aa_masses, "fmods", exact = TRUE) + nm_vmods <- attr(aa_masses, "vmods", exact = TRUE) + + message("Matching against: ", + if (nchar(nm_vmods) == 0L) nm_fmods else paste0(nm_fmods, " | ", nm_vmods)) + + df <- vector("list", length(mgths)) + + for (i in seq_along(mgths)) { + df[[i]] <- ms2match_one( + mgths[[i]], + pep_mod_group = pep_mod_group, aa_masses = aa_masses, FUN = FUN, + ms1vmods = ms1vmods, ms2vmods = ms2vmods, cl = cl, + mod_indexes = mod_indexes, mgf_path = mgf_path, out_path = out_path, + type_ms2ions = type_ms2ions, + maxn_vmods_per_pep = maxn_vmods_per_pep, + maxn_sites_per_vmod = maxn_sites_per_vmod, + maxn_fnl_per_seq = maxn_fnl_per_seq, + maxn_vnl_per_seq = maxn_vnl_per_seq, + maxn_vmods_sitescombi_per_pep = maxn_vmods_sitescombi_per_pep, + minn_ms2 = minn_ms2, ppm_ms1 = ppm_ms1, ppm_ms2 = ppm_ms2, + min_ms2mass = min_ms2mass, index_mgf_ms2 = index_mgf_ms2, + df0 = df0) + } + + df <- dplyr::bind_rows(df) + + out_nm <- file.path(out_path, "temp", paste0("ion_matches_", pep_mod_group, ".rds")) + + if (is.null(df)) { + qs::qsave(df, out_nm, preset = "fast") + return(NULL) + } + + # fields not yet available with `ms2match_all` + df[["pep_fmod"]] <- nm_fmods + df[["pep_vmod"]] <- nm_vmods + df[["pep_mod_group"]] <- pep_mod_group + + df <- dplyr::rename(df, + pep_ret_range = ret_time, + pep_scan_title = scan_title, + pep_exp_mz = ms1_moverz, + pep_n_ms2 = ms2_n, + pep_exp_mr = ms1_mass, + pep_tot_int = ms1_int, + pep_scan_num = scan_num, + pep_exp_z = ms1_charge, + pep_ms2_moverzs = ms2_moverz, + pep_ms2_ints = ms2_int, + # pep_frame = frame, + ) + # df[["pep_scan_num"]] <- as.character(df[["pep_scan_num"]]) + df <- reloc_col_after(df, "raw_file", "scan_num") + df <- reloc_col_after(df, "pep_mod_group", "raw_file") + qs::qsave(df, out_nm, preset = "fast") +} + + +#' Matches experimentals and theoreticals +#' +#' For a single module +#' +#' @param mgth MGF and theoretical pairs #' @param pep_mod_group The index of peptide modification groups #' @param aa_masses An amino-acid look-up #' @param FUN A function, e.g., \link{gen_ms2ions_base}, with an i-th module of @@ -1336,7 +1371,7 @@ search_mgf <- function (expt_mass_ms1 = NULL, expt_moverz_ms2 = NULL, #' @param cl The value of clusters for parallel processes #' @param df0 An output template #' @inheritParams hms2match -ms2match_one <- function (pep_mod_group, aa_masses, FUN, +ms2match_one <- function (mgth, pep_mod_group, aa_masses, FUN, ms1vmods, ms2vmods, cl, mod_indexes, mgf_path, out_path, type_ms2ions = "by", maxn_vmods_per_pep = 5L, maxn_sites_per_vmod = 3L, @@ -1344,26 +1379,20 @@ ms2match_one <- function (pep_mod_group, aa_masses, FUN, maxn_vmods_sitescombi_per_pep = 64L, minn_ms2 = 6L, ppm_ms1 = 10L, ppm_ms2 = 10L, min_ms2mass = 115L, index_mgf_ms2 = FALSE, - df0 = NULL, digits = 4L) + df0 = NULL) { - nm_fmods <- attr(aa_masses, "fmods", exact = TRUE) - nm_vmods <- attr(aa_masses, "vmods", exact = TRUE) - - message("Matching against: ", - if (nchar(nm_vmods) == 0L) nm_fmods else paste0(nm_fmods, " | ", nm_vmods)) - - mgth <- paste0("expttheo_", pep_mod_group, ".rds") out_name <- gsub("^expttheo", "ion_matches", mgth) mgftheo <- qs::qread(file.path(mgf_path, mgth)) mgf_frames <- mgftheo[["mgf_frames"]] theopeps <- mgftheo[["theopeps"]] - rm("mgftheo") + rm(list = "mgftheo") + + len <- length(mgf_frames) - if (!length(mgf_frames)) { - qs::qsave(df0, file.path(out_path, "temp", out_name)) + # NULL or NULL list + if ((!len) || (len == 1L && is.null(mgf_frames[[1]]))) return(df0) - } - + ntmod <- attr(aa_masses, "ntmod", exact = TRUE) ctmod <- attr(aa_masses, "ctmod", exact = TRUE) ntmass <- find_nterm_mass(aa_masses) @@ -1406,42 +1435,10 @@ ms2match_one <- function (pep_mod_group, aa_masses, FUN, ppm_ms2 = ppm_ms2, min_ms2mass = min_ms2mass, index_mgf_ms2 = index_mgf_ms2, - digits = digits, FUN = FUN), .scheduling = "dynamic") - - df <- dplyr::bind_rows(df) - - out_nm <- file.path(out_path, "temp", paste0("ion_matches_", pep_mod_group, ".rds")) - if (is.null(df)) { - qs::qsave(df, out_nm, preset = "fast") - return(NULL) - } - - # fields not yet available with `ms2match_all` - df[["pep_fmod"]] <- nm_fmods - df[["pep_vmod"]] <- nm_vmods - df[["pep_mod_group"]] <- pep_mod_group - - df <- dplyr::rename(df, - pep_ret_range = ret_time, - pep_scan_title = scan_title, - pep_exp_mz = ms1_moverz, - pep_n_ms2 = ms2_n, - pep_exp_mr = ms1_mass, - pep_tot_int = ms1_int, - pep_scan_num = scan_num, - pep_exp_z = ms1_charge, - pep_ms2_moverzs = ms2_moverz, - pep_ms2_ints = ms2_int, - pep_frame = frame) - df[["pep_scan_num"]] <- as.character(df[["pep_scan_num"]]) - - df <- reloc_col_after(df, "raw_file", "scan_num") - df <- reloc_col_after(df, "pep_mod_group", "raw_file") - - qs::qsave(df, out_nm, preset = "fast") + df <- dplyr::bind_rows(df) } @@ -1469,7 +1466,7 @@ ms2match_one <- function (pep_mod_group, aa_masses, FUN, #' @param FUN A function pointer to, e.g., \link{gen_ms2ions_base}. #' @inheritParams matchMS #' @inheritParams ms2match -#' @inheritParams ms2match_one +#' @inheritParams hms2match_one #' @return Matches to each MGF as a list elements. The length of the output is #' equal to the number of MGFs in the given frame. frames_adv <- function (mgf_frames = NULL, theopeps = NULL, @@ -1484,18 +1481,19 @@ frames_adv <- function (mgf_frames = NULL, theopeps = NULL, maxn_vmods_sitescombi_per_pep = 64L, minn_ms2 = 6L, ppm_ms1 = 10L, ppm_ms2 = 10L, min_ms2mass = 115L, index_mgf_ms2 = FALSE, - digits = 4L, FUN) + FUN) { len <- length(mgf_frames) if (!len) return(NULL) + frames <- as.integer(names(mgf_frames)) out <- vector("list", len) ## --- initiation --- mgfs_cr <- mgf_frames[[1]] - frame <- mgfs_cr[["frame"]][1] + frame <- frames[1] bfi <- 1L thbf <- theopeps[[bfi]] @@ -1527,8 +1525,7 @@ frames_adv <- function (mgf_frames = NULL, theopeps = NULL, maxn_sites_per_vmod = maxn_sites_per_vmod, maxn_fnl_per_seq = maxn_fnl_per_seq, maxn_vnl_per_seq = maxn_vnl_per_seq, - maxn_vmods_sitescombi_per_pep = maxn_vmods_sitescombi_per_pep, - digits = digits + maxn_vmods_sitescombi_per_pep = maxn_vmods_sitescombi_per_pep ), SIMPLIFY = FALSE, USE.NAMES = FALSE @@ -1556,9 +1553,7 @@ frames_adv <- function (mgf_frames = NULL, theopeps = NULL, maxn_sites_per_vmod = maxn_sites_per_vmod, maxn_fnl_per_seq = maxn_fnl_per_seq, maxn_vnl_per_seq = maxn_vnl_per_seq, - maxn_vmods_sitescombi_per_pep = - maxn_vmods_sitescombi_per_pep, - digits = digits + maxn_vmods_sitescombi_per_pep = maxn_vmods_sitescombi_per_pep ), SIMPLIFY = FALSE, USE.NAMES = FALSE @@ -1600,9 +1595,7 @@ frames_adv <- function (mgf_frames = NULL, theopeps = NULL, maxn_sites_per_vmod = maxn_sites_per_vmod, maxn_fnl_per_seq = maxn_fnl_per_seq, maxn_vnl_per_seq = maxn_vnl_per_seq, - maxn_vmods_sitescombi_per_pep = - maxn_vmods_sitescombi_per_pep, - digits = digits + maxn_vmods_sitescombi_per_pep = maxn_vmods_sitescombi_per_pep ), SIMPLIFY = FALSE, USE.NAMES = FALSE @@ -1633,7 +1626,7 @@ frames_adv <- function (mgf_frames = NULL, theopeps = NULL, # advance to the next frame mgfs_cr <- mgf_frames[[i+1]] - new_frame <- mgfs_cr[["frame"]][1] + new_frame <- frames[i+1L] if (isTRUE(new_frame == (frame + 1L))) { cri <- cri + 1L @@ -1675,9 +1668,7 @@ frames_adv <- function (mgf_frames = NULL, theopeps = NULL, maxn_sites_per_vmod = maxn_sites_per_vmod, maxn_fnl_per_seq = maxn_fnl_per_seq, maxn_vnl_per_seq = maxn_vnl_per_seq, - maxn_vmods_sitescombi_per_pep = - maxn_vmods_sitescombi_per_pep, - digits = digits + maxn_vmods_sitescombi_per_pep = maxn_vmods_sitescombi_per_pep ), SIMPLIFY = FALSE, USE.NAMES = FALSE @@ -1714,9 +1705,7 @@ frames_adv <- function (mgf_frames = NULL, theopeps = NULL, maxn_sites_per_vmod = maxn_sites_per_vmod, maxn_fnl_per_seq = maxn_fnl_per_seq, maxn_vnl_per_seq = maxn_vnl_per_seq, - maxn_vmods_sitescombi_per_pep = - maxn_vmods_sitescombi_per_pep, - digits = digits + maxn_vmods_sitescombi_per_pep = maxn_vmods_sitescombi_per_pep ), SIMPLIFY = FALSE, USE.NAMES = FALSE @@ -1741,9 +1730,7 @@ frames_adv <- function (mgf_frames = NULL, theopeps = NULL, maxn_sites_per_vmod = maxn_sites_per_vmod, maxn_fnl_per_seq = maxn_fnl_per_seq, maxn_vnl_per_seq = maxn_vnl_per_seq, - maxn_vmods_sitescombi_per_pep = - maxn_vmods_sitescombi_per_pep, - digits = digits + maxn_vmods_sitescombi_per_pep = maxn_vmods_sitescombi_per_pep ), SIMPLIFY = FALSE, USE.NAMES = FALSE diff --git a/R/msmsmatches.R b/R/msmsmatches.R index 9fa10cf..66f46f8 100644 --- a/R/msmsmatches.R +++ b/R/msmsmatches.R @@ -59,6 +59,9 @@ #' \code{TMT11plex} for TMT-11 and (2) \code{TMT16plex} for TMTpro. See also #' \link{parse_unimod} for grammars of modification \code{title}, #' \code{position} and \code{site}. +#' @param rm_dup_term_anywhere Logical; if TRUE, removes combinations in +#' variable modifications with site(s) in positions of both terminal and +#' anywhere, e.g., "Gln->pyro-Glu (N-term = Q)" and "Deamidated (Q). #' @param fixedlabs Character string(s) of fixed isotopic labels. See examples #' of SILAC for details. Can be but not typically used in standard alone #' searches of labeled residues. @@ -165,14 +168,12 @@ #' interrogation. The default is 110. #' @param max_ms2mass A positive integer; the maximum MS2 mass for #' interrogation. -#' @param n_13c A non-negative integer; the maximum number of 13C off-sets for -#' consideration in MS1 masses. The default is 0 with no off-sets. -#' Peak-pickings by various MGF conversion tools may have attempted to adjust -#' precursor masses to the corresponding mono-isotopic masses in isotope -#' envelopes. Nevertheless, by setting \code{n_13c = 1}, some increases in the -#' number of PSMs may be readily achieved at a relatively small cost of search -#' time. -#' @param par_groups A low -priority feature. Parameter(s) of \code{matchMS} +#' @param n_13c Number(s) of 13C off-sets in precursor masses, for example, over +#' the range of \code{-1:2}. The default is 0. +#' @param ms1_notches A numeric vector; notches (off-sets) in precursor masses, +#' e.g., \code{c(-79.966331, -97.976896)} to account fo the loss of a phospho +#' group and phosphoric acid in precursor masses. +#' @param par_groups A low-priority feature. Parameter(s) of \code{matchMS} #' multiplied by sets of values in groups. Multiple searches will be performed #' separately against the parameter groups. For instance with one set of #' samples in SILAC light and the other in SILAC heavy, the experimenters may @@ -302,18 +303,18 @@ #' \code{max_protnpep_co = Inf} to learn automatically the cut-off from data. #' Note that the the value of \code{prot_n_pep} includes the counts of shared #' peptides. -#' @param method_prot_es_co A character string; the method to calculate the -#' cut-offs of protein enrichment scores. The value is in one of \code{ -#' "median", "mean", "max", "min"} with the default of \code{"median"}. For -#' instance at the default, the median of \code{peptide_score - -#' pep_score_cutoff} under a protein will be used to represent the threshold -#' of a protein enrichment score. For more conserved thresholds, the -#' statistics of \code{"max"} may be considered. -#' @param soft_secions Impacts on search performance not yet assessed. Logical; -#' if TRUE, collapses the intensities of secondary ions to primary ions even -#' when the primaries are absent. The default is FALSE. For instance, the -#' signal of \code{b5^*} will be ignored if its primary ion \code{b5} is not -#' matched. +#' @param method_prot_es_co A low-priority setting. A character string; the +#' method to calculate the cut-offs of protein enrichment scores. The value is +#' in one of \code{"median", "mean", "max", "min"} with the default of +#' \code{"median"}. For instance at the default, the median of +#' \code{peptide_score - pep_score_cutoff} under a protein will be used to +#' represent the threshold of a protein enrichment score. For more conserved +#' thresholds, the statistics of \code{"max"} may be considered. +#' @param soft_secions Logical; if TRUE, collapses the intensities of secondary +#' ions to primary ions even when the primaries are absent. The default is +#' FALSE. For instance, the signal of \code{b5^*} will be ignored if its +#' primary ion \code{b5} is not matched. The impacts of \code{soft_secions = +#' TRUE} on search performance has not yet been assessed. #' @param topn_seqs_per_query Positive integer; a threshold to discard peptide #' matches under the same MS query with scores beyond the top-n. #' @@ -418,10 +419,10 @@ #' suggested. Occasionally experimenters may remove the file folder for disk #' space or under infrequent events of modified framework incurred by the #' developer. -#' @param by_modules Logical. Experimenting. At the TRUE default, searches MS -#' data by individual modules of combinatorial fixed and variable -#' modifications. If FALSE, search all modules together. The later would -#' probably need more than 32G RAM if the number of modules is over 96. +#' @param by_modules Not used. Logical. At the TRUE default, searches MS data by +#' individual modules of combinatorial fixed and variable modifications. If +#' FALSE, search all modules together. The later would probably need more than +#' 32G RAM if the number of modules is over 96. #' @param digits A non-negative integer; the number of decimal places to be #' used. The default is 4. #' @param ... Not currently used. @@ -461,7 +462,6 @@ #' out_path = "~/mzion/examples", #' ) #' -#' #' # TMT-16plex, phospho #' matchMS( #' fixedmods = c("TMTpro (N-term)", "TMTpro (K)", "Carbamidomethyl (C)"), @@ -474,6 +474,16 @@ #' out_path = "~/mzion/examples", #' ) #' +#' # TMT-18plex +#' matchMS( +#' fixedmods = c("TMTpro (N-term)", "TMTpro (K)", "Carbamidomethyl (C)"), +#' varmods = c("Acetyl (Protein N-term)", "Oxidation (M)", +#' "Deamidated (N)", "Gln->pyro-Glu (N-term = Q)"), +#' quant = "tmt18", +#' fdr_type = "psm", +#' out_path = "~/mzion/examples", +#' ) +#' #' # Bruker's PASEF #' matchMS( #' fixedmods = c("Carbamidomethyl (C)"), @@ -648,6 +658,7 @@ matchMS <- function (out_path = "~/mzion/outs", varmods = c("Acetyl (Protein N-term)", "Oxidation (M)", "Deamidated (N)", "Gln->pyro-Glu (N-term = Q)"), + rm_dup_term_anywhere = TRUE, fixedlabs = NULL, varlabs = NULL, locmods = c("Phospho (S)", "Phospho (T)", "Phospho (Y)"), @@ -678,6 +689,7 @@ matchMS <- function (out_path = "~/mzion/outs", min_mass = 200L, max_mass = 4500L, ppm_ms1 = 20L, n_13c = 0L, + ms1_notches = 0, par_groups = NULL, silac_mix = NULL, @@ -710,7 +722,7 @@ matchMS <- function (out_path = "~/mzion/outs", combine_tier_three = FALSE, max_n_prots = 60000L, use_ms1_cache = TRUE, - .path_cache = "~/mzion/.MSearches (1.2.6)/Cache/Calls", + .path_cache = "~/mzion/.MSearches (1.2.7)/Cache/Calls", .path_fasta = NULL, topn_ms2ions = 100L, @@ -802,7 +814,7 @@ matchMS <- function (out_path = "~/mzion/outs", stopifnot(vapply(c(soft_secions, combine_tier_three, calib_ms1mass, use_ms1_cache, add_ms2theos, add_ms2theos2, add_ms2moverzs, add_ms2ints, exclude_reporter_region, index_mgf_ms2, - svm_cv), + svm_cv, rm_dup_term_anywhere), is.logical, logical(1L))) # numeric types @@ -868,9 +880,8 @@ matchMS <- function (out_path = "~/mzion/outs", stopifnot(min_len >= 1L, max_len >= min_len, max_miss <= 10L, minn_ms2 >= 2L, min_mass >= 1L, max_mass >= min_mass, min_ms2mass >= 1L, max_ms2mass > min_ms2mass, - # maxn_fnl_per_seq >= 2L, maxn_vnl_per_seq >= 2L, maxn_vmods_sitescombi_per_pep >= 2L, - n_13c >= 0L, noenzyme_maxn >= 0L, + noenzyme_maxn >= 0L, maxn_vmods_per_pep >= maxn_sites_per_vmod, max_n_prots > 1000L, min_ms1_charge >= 1L, max_ms1_charge >= min_ms1_charge, min_scan_num >= 1L, max_scan_num >= min_scan_num, @@ -1049,13 +1060,11 @@ matchMS <- function (out_path = "~/mzion/outs", if (length(par_groups)) { if ("out_path" %in% names(par_groups)) stop("Do not include `out_path` in `par_groups`.\n", - "The same parent `out_path` is assumed.", - call. = FALSE) + "The same parent `out_path` is assumed.") if ("fasta" %in% names(par_groups)) stop("Do not include `fasta` in `par_groups`.\n", - "The same set of `fasta` files is assumed.", - call. = FALSE) + "The same set of `fasta` files is assumed.") grp_args <- local({ nms <- lapply(par_groups, names) @@ -1064,15 +1073,14 @@ matchMS <- function (out_path = "~/mzion/outs", if (!identical(nms_1, all_nms)) stop("Not all names are identical to those in the first group: ", - paste(nms_1, collapse = ", "), - call. = FALSE) + paste(nms_1, collapse = ", ")) fargs <- formalArgs(fun) bads <- nms_1[! nms_1 %in% fargs] if (length(bads)) stop("Arguments in `par_groups` not defined in `", fun, "`:\n ", - paste(bads, collapse = ", "), call. = FALSE) + paste(bads, collapse = ", ")) cargs <- names(this_call) cargs <- cargs[cargs != ""] @@ -1080,7 +1088,7 @@ matchMS <- function (out_path = "~/mzion/outs", if (length(dups)) stop("Arguments in `par_groups` already in the call", ":\n ", - paste(dups, collapse = ", "), call. = FALSE) + paste(dups, collapse = ", ")) nms_1 }) @@ -1156,10 +1164,10 @@ matchMS <- function (out_path = "~/mzion/outs", } ## Theoretical MS1 masses - bypass_pepmasses <- dots$bypass_pepmasses - if (is.null(bypass_pepmasses)) bypass_pepmasses <- FALSE + if (is.null(bypass_pepmasses <- dots$bypass_pepmasses)) + bypass_pepmasses <- FALSE - if (!bypass_pepmasses) { + if (!bypass_pepmasses) res <- calc_pepmasses2( aa_masses = aa_masses, fasta = fasta, @@ -1167,6 +1175,7 @@ matchMS <- function (out_path = "~/mzion/outs", acc_pattern = acc_pattern, fixedmods = fixedmods, varmods = varmods, + rm_dup_term_anywhere = rm_dup_term_anywhere, fixedlabs = fixedlabs, varlabs = varlabs, mod_motifs = mod_motifs, @@ -1182,50 +1191,54 @@ matchMS <- function (out_path = "~/mzion/outs", max_miss = max_miss, min_mass = min_mass, max_mass = max_mass, - n_13c = n_13c, out_path = out_path, digits = digits, use_ms1_cache = use_ms1_cache, .path_cache = .path_cache, .path_fasta = .path_fasta, - .path_ms1masses = .path_ms1masses - ) - } + .path_ms1masses = .path_ms1masses) ## Bin theoretical peptides - bypass_bin_ms1 <- dots$bypass_bin_ms1 - if (is.null(bypass_bin_ms1)) bypass_bin_ms1 <- FALSE + if (is.null(bypass_bin_ms1 <- dots$bypass_bin_ms1)) + bypass_bin_ms1 <- FALSE reframe_mgfs <- calib_ms1mass && ppm_ms1calib != ppm_ms1 if (!bypass_bin_ms1) { - bin_ms1masses(res = res, - min_mass = min_mass, - max_mass = max_mass, - ppm_ms1 = ppm_ms1, - use_ms1_cache = use_ms1_cache, - .path_cache = .path_cache, - .path_ms1masses = .path_ms1masses, - out_path = out_path) - - if (reframe_mgfs) { + .path_bin <- bin_ms1masses(res = res, min_mass = min_mass, max_mass = max_mass, - ppm_ms1 = ppm_ms1calib, + min_len = min_len, + max_len = max_len, + ppm_ms1 = ppm_ms1, use_ms1_cache = use_ms1_cache, .path_cache = .path_cache, .path_ms1masses = .path_ms1masses, + enzyme = enzyme, out_path = out_path) - } - try(rm(list = "res"), silent = TRUE) - gc() + if (reframe_mgfs) + .path_bin_calib <- + bin_ms1masses(res = res, + min_mass = min_mass, + max_mass = max_mass, + min_len = min_len, + max_len = max_len, + ppm_ms1 = ppm_ms1calib, + use_ms1_cache = use_ms1_cache, + .path_cache = .path_cache, + .path_ms1masses = .path_ms1masses, + enzyme = enzyme, + out_path = out_path) + + if (exists("res")) + rm(list = "res") } ## MGFs - bypass_mgf <- dots$bypass_mgf - if (is.null(bypass_mgf)) bypass_mgf <- FALSE + if (is.null(bypass_mgf <- dots$bypass_mgf)) + bypass_mgf <- FALSE if (!bypass_mgf) load_mgfs(out_path = out_path, @@ -1254,8 +1267,8 @@ matchMS <- function (out_path = "~/mzion/outs", digits = digits) ## MSMS matches - bypass_ms2match <- dots$bypass_ms2match - if (is.null(bypass_ms2match)) bypass_ms2match <- FALSE + if (is.null(bypass_ms2match <- dots$bypass_ms2match)) + bypass_ms2match <- FALSE .time_stamp <- find_ms1_times(out_path) @@ -1276,9 +1289,9 @@ matchMS <- function (out_path = "~/mzion/outs", mod_indexes <- NULL } - if (calib_ms1mass) { + if (calib_ms1mass) calib_mgf(mgf_path = mgf_path, aa_masses_all = aa_masses_all[1], # base - out_path = out_path, + out_path = out_path, .path_bin = .path_bin_calib, mod_indexes = mod_indexes[names(mod_indexes) %in% fixedmods], type_ms2ions = type_ms2ions, maxn_vmods_per_pep = maxn_vmods_per_pep, @@ -1298,16 +1311,16 @@ matchMS <- function (out_path = "~/mzion/outs", enzyme = enzyme, maxn_fasta_seqs = maxn_fasta_seqs, maxn_vmods_setscombi = maxn_vmods_setscombi, min_len = min_len, max_len = max_len, max_miss = max_miss, - knots = 50L, digits = digits) - } + knots = 50L) if (!bypass_ms2match) { if (min_ms2mass < 5L) warning("Maybe out of RAM at \"min_ms2mass < 5L\".") - + ms2match(mgf_path = mgf_path, aa_masses_all = aa_masses_all, out_path = out_path, + .path_bin = .path_bin, mod_indexes = mod_indexes, type_ms2ions = type_ms2ions, maxn_vmods_per_pep = maxn_vmods_per_pep, @@ -1326,6 +1339,8 @@ matchMS <- function (out_path = "~/mzion/outs", ppm_reporters = ppm_reporters, index_mgf_ms2 = index_mgf_ms2, by_modules = by_modules, + n_13c = n_13c, + ms1_notches = ms1_notches, # dummy for argument matching fasta = fasta, @@ -1339,23 +1354,22 @@ matchMS <- function (out_path = "~/mzion/outs", maxn_vmods_setscombi = maxn_vmods_setscombi, min_len = min_len, max_len = max_len, - max_miss = max_miss, - digits = digits) + max_miss = max_miss) } ## Peptide scores - bypass_from_pepscores <- dots$bypass_from_pepscores - if (is.null(bypass_from_pepscores)) bypass_from_pepscores <- FALSE + if (is.null(bypass_from_pepscores <- dots$bypass_from_pepscores)) + bypass_from_pepscores <- FALSE if (bypass_from_pepscores) return(NULL) - bypass_pepscores <- dots$bypass_pepscores - if (is.null(bypass_pepscores)) bypass_pepscores <- FALSE + if (is.null(bypass_pepscores <- dots$bypass_pepscores)) + bypass_pepscores <- FALSE if (!bypass_pepscores) { - tally_ms2ints <- dots$tally_ms2ints - if (is.null(tally_ms2ints)) tally_ms2ints <- TRUE + if (is.null(tally_ms2ints <- dots$tally_ms2ints)) + tally_ms2ints <- TRUE calc_pepscores(topn_ms2ions = topn_ms2ions, type_ms2ions = type_ms2ions, @@ -1395,8 +1409,8 @@ matchMS <- function (out_path = "~/mzion/outs", digits = digits) } - bypass_primatches <- dots$bypass_primatches - if (is.null(bypass_primatches)) bypass_primatches <- FALSE + if (is.null(bypass_primatches <- dots$bypass_primatches)) + bypass_primatches <- FALSE if (!bypass_primatches) hadd_primatches(out_path = out_path, @@ -1408,8 +1422,8 @@ matchMS <- function (out_path = "~/mzion/outs", index_mgf_ms2 = index_mgf_ms2) ## Peptide FDR - bypass_pepfdr <- dots$bypass_pepfdr - if (is.null(bypass_pepfdr)) bypass_pepfdr <- FALSE + if (is.null(bypass_pepfdr <- dots$bypass_pepfdr)) + bypass_pepfdr <- FALSE if (!bypass_pepfdr) { prob_cos <- calc_pepfdr(target_fdr = target_fdr, @@ -1448,8 +1462,8 @@ matchMS <- function (out_path = "~/mzion/outs", } ## Peptide ranks and score deltas between `pep_ivmod` - bypass_peploc <- dots$bypass_peploc - if (is.null(bypass_peploc)) bypass_peploc <- FALSE + if (is.null(bypass_peploc <- dots$bypass_peploc)) + bypass_peploc <- FALSE if (!bypass_peploc) { calc_peploc(out_path = out_path, @@ -1457,18 +1471,18 @@ matchMS <- function (out_path = "~/mzion/outs", locmods = locmods, topn_mods_per_seq = topn_mods_per_seq, topn_seqs_per_query = topn_seqs_per_query) - gc() } ## Protein accessions - bypass_from_protacc <- dots$bypass_from_protacc - if (is.null(bypass_from_protacc)) bypass_from_protacc <- FALSE + if (is.null(bypass_from_protacc <- dots$bypass_from_protacc)) + bypass_from_protacc <- FALSE if (bypass_from_protacc) return(NULL) + + if (is.null(bypass_protacc <- dots$bypass_protacc)) + bypass_protacc <- FALSE - bypass_protacc <- dots$bypass_protacc - if (is.null(bypass_protacc)) bypass_protacc <- FALSE temp_dir <- file.path(out_path, "temp") file_protacc <- file.path(temp_dir, "df_protacc.rds") @@ -1501,8 +1515,8 @@ matchMS <- function (out_path = "~/mzion/outs", rm(list = "file_protacc") ## Protein FDR - bypass_protfdr <- dots$bypass_protfdr - if (is.null(bypass_protfdr)) bypass_protfdr <- FALSE + if (is.null(bypass_protfdr <- dots$bypass_protfdr)) + bypass_protfdr <- FALSE file_protfdr <- file.path(temp_dir, "df_protfdr.rds") @@ -1520,12 +1534,11 @@ matchMS <- function (out_path = "~/mzion/outs", } df <- add_rptrs(df, quant, out_path) - gc() ## Clean-ups # (raw_file etc. already mapped if `from_group_search`) - from_group_search <- dots$from_group_search - if (!isTRUE(from_group_search)) df <- map_raw_n_scan(df, mgf_path) + if (!isTRUE(from_group_search <- dots$from_group_search)) + df <- map_raw_n_scan(df, mgf_path) df <- dplyr::mutate(df, pep_expect = 10^((pep_score_co - pep_score)/10) * target_fdr) df[["pep_score_co"]] <- NULL @@ -1550,12 +1563,14 @@ matchMS <- function (out_path = "~/mzion/outs", rm(list = c("cols_tmt", "rows_tmt")) local({ + df$pep_exp_mz <- round(df$pep_exp_mz, digits = 4L) df$pep_exp_mr <- round(df$pep_exp_mr, digits = 4L) df$pep_calc_mr <- round(df$pep_calc_mr, digits = 4L) df$pep_delta <- round(df$pep_delta, digits = 4L) + df$pep_tot_int <- round(df$pep_tot_int, digits = 1L) df$pep_expect <- format(df$pep_expect, digits = 3L) - readr::write_tsv(df, file.path(out_path, "psmC.txt")) + readr::write_tsv(df, file.path(out_path, "psmC.txt")) session_info <- sessionInfo() save(session_info, file = file.path(out_path, "Calls", "mzion.rda")) }) @@ -1565,7 +1580,6 @@ matchMS <- function (out_path = "~/mzion/outs", "prot_issig", "prot_n_pep")] df <- dplyr::filter(df, pep_issig, !pep_isdecoy, !grepl("^-", prot_acc)) - gc() df <- try_psmC2Q(df, out_path = out_path, @@ -1602,20 +1616,17 @@ try_psmC2Q <- function (df = NULL, out_path = NULL, fdr_type = "protein", stop() } - if (n_peps > 1000000L && n_prots > 100000L) { + if (n_peps > 1000000L && n_prots > 100000L) df <- NA - } - else { + else df <- tryCatch( psmC2Q(df, out_path = out_path, fdr_type = fdr_type, combine_tier_three = combine_tier_three, max_n_prots = max_n_prots), - error = function(e) NA - ) - } - + error = function(e) NA) + if (length(df) == 1L && is.na(df)) { message("Retry with a new R session: \n\n", "Manual execution of the following codes if not start automatically.\n\n", @@ -1646,10 +1657,9 @@ try_psmC2Q <- function (df = NULL, out_path = NULL, fdr_type = "protein", } else { suppressWarnings( - rm(list = c(".path_cache", ".path_ms1masses", ".time_stamp"), - envir = .GlobalEnv) - ) - + rm(list = c(".path_cache", ".path_ms1masses", ".time_stamp"), + envir = .GlobalEnv)) + message("Done.") } @@ -1789,8 +1799,7 @@ psmC2Q <- function (df = NULL, out_path = NULL, fdr_type = "protein", df <- unique(df [, c("prot_acc", "pep_seq")]) df2 <- unique(df2[, c("prot_acc", "pep_seq")]) df3 <- unique(df3[, c("prot_acc", "pep_seq")]) - gc() - + nms <- c("prot_acc", "pep_seq", "prot_isess", "prot_hit_num", "prot_family_member", "pep_literal_unique", "pep_razor_unique") @@ -1824,7 +1833,6 @@ psmC2Q <- function (df = NULL, out_path = NULL, fdr_type = "protein", df3 <- post_psmC2Q(df3, dfC, tier = 3L) rm(list = "dfC") - gc() # Three-tier combines nms_df <- names(df) @@ -1902,29 +1910,29 @@ post_psmC2Q <- function (df, dfC, tier = NULL) df <- dplyr::bind_cols( df[, ord_prots, drop = FALSE], - df[, !names(df) %in% ord_prots, drop = FALSE] - ) + df[, !names(df) %in% ord_prots, drop = FALSE]) ord_peps <- c("pep_seq", "pep_issig", "pep_literal_unique", "pep_razor_unique", "pep_score", "pep_expect") df <- dplyr::bind_cols( df[, ord_peps, drop = FALSE], - df[, !names(df) %in% ord_peps, drop = FALSE] - ) - + df[, !names(df) %in% ord_peps, drop = FALSE]) + df <- dplyr::bind_cols( df[grepl("^prot_", names(df))], df[grepl("^pep_", names(df))], df[grepl("^psm_", names(df))], - df[!grepl("^prot_|^pep_|^psm_", names(df))], - ) + df[!grepl("^prot_|^pep_|^psm_", names(df))]) + + df$pep_exp_mz <- round(df$pep_exp_mz, digits = 4L) df$pep_exp_mr <- round(df$pep_exp_mr, digits = 4L) df$pep_calc_mr <- round(df$pep_calc_mr, digits = 4L) df$pep_delta <- round(df$pep_delta, digits = 4L) + df$pep_tot_int <- round(df$pep_tot_int, digits = 1L) df$pep_expect <- format(df$pep_expect, digits = 3L) - + df <- dplyr::select(df, -which(names(df) %in% c("prot_n_psm", "prot_n_pep"))) } @@ -1970,36 +1978,34 @@ check_tmt_pars <- function (fixedmods, varmods, quant) fvmods <- c(fixedmods, varmods) - if (grepl("^tmt[0-9]+", quant)) { - possibles <- fvmods[grepl("^TMT", fvmods)] + if (!grepl("^tmt[0-9]+", quant)) + return(NULL) + + tmts <- fvmods[grepl("^TMT", fvmods)] + + if (quant == "tmt18") { + ok <- all(grepl("TMTpro18.* |TMT18plex.* ", tmts)) - if (quant == "tmt18") { - ok <- all(grepl("TMTpro18.* |TMT18plex.* ", possibles)) - - if (!ok) - warning("All TMT modifications need to be `TMTpro18` or `TMT18plex` at `", - quant, "`.\n", - tmt_msg_1, "\n", tmt_msg_2, "\n", tmt_msg_3, - call. = FALSE) - } - else if (quant == "tmt16") { - ok <- all(grepl("TMTpro.* |TMT16plex.* ", possibles)) - - if (!ok) - warning("All TMT modifications need to be `TMTpro` or `TMT16plex` at `", - quant, "`.\n", - tmt_msg_1, "\n", tmt_msg_2, "\n", tmt_msg_3, - call. = FALSE) - } - else { - ok <- all(grepl("TMT6plex.* |TMT10plex.* |TMT11plex.* ", possibles)) - - if (!ok) - warning("All TMT modifications need to be `TMT6plex`, `TMT10plex` or `TMT11plex` at `", - quant, "`.\n", - tmt_msg_1, "\n", tmt_msg_2, "\n", tmt_msg_3, - call. = FALSE) - } + if (!ok) + warning("All TMT modifications need to be `TMTpro18` or `TMT18plex` at `", + quant, "`.\n", + tmt_msg_1, "\n", tmt_msg_2, "\n", tmt_msg_3) + } + else if (quant == "tmt16") { + ok <- all(grepl("TMTpro.* |TMT16plex.* ", tmts)) + + if (!ok) + warning("All TMT modifications need to be `TMTpro` or `TMT16plex` at `", + quant, "`.\n", + tmt_msg_1, "\n", tmt_msg_2, "\n", tmt_msg_3) + } + else { + ok <- all(grepl("TMT6plex.* |TMT10plex.* |TMT11plex.* ", tmts)) + + if (!ok) + warning("All TMT modifications need to be `TMT6plex`, `TMT10plex` or `TMT11plex` at `", + quant, "`.\n", + tmt_msg_1, "\n", tmt_msg_2, "\n", tmt_msg_3) } invisible(NULL) @@ -2020,7 +2026,7 @@ checkMGF <- function (mgf_path = NULL, grp_args = NULL, error = c("stop", "warn" stop("`error` needs to be one of \"error\" or \"stop\".") if (is.null(mgf_path)) - stop("`mgf_path` not found.", call. = FALSE) + stop("`mgf_path` not found.") fi_mgf <- list.files(path = file.path(mgf_path), pattern = "^.*\\.mgf$") fi_mzml <- list.files(path = file.path(mgf_path), pattern = "^.*\\.mzML$") @@ -2032,9 +2038,9 @@ checkMGF <- function (mgf_path = NULL, grp_args = NULL, error = c("stop", "warn" if (!(len_mgf || len_mzml)) { if (error == "warn") - warning("No `.mgf` files immediately under ", mgf_path, call. = FALSE) + warning("No `.mgf` files immediately under ", mgf_path) else - stop("No `.mgf` files immediately under ", mgf_path, call. = FALSE) + stop("No `.mgf` files immediately under ", mgf_path) } invisible(mgf_path) @@ -2128,17 +2134,12 @@ check_fdr_group <- function (fdr_group = c("base", "all", "top3"), if (is_trivial) return(oks[[1]]) - fdr_group <- unique(fdr_group) - - len <- length(fdr_group) - - if (len > 1L) { - if (all(fdr_group %in% oks)) - fdr_group <- oks[1] - else - fdr_group <- fdr_group[!fdr_group %in% oks] - } + len <- length(fdr_group <- unique(fdr_group)) + oks2 <- fdr_group %in% oks + if (len > 1L) + fdr_group <- if (all(oks2)) oks[1] else fdr_group[!oks2] + as.character(fdr_group) } diff --git a/R/msmsmatches2.R b/R/msmsmatches2.R index f8083c1..78a6367 100644 --- a/R/msmsmatches2.R +++ b/R/msmsmatches2.R @@ -7,6 +7,7 @@ #' fixed and variable modifications. #' @param mod_indexes Integer; the indexes of fixed and/or variable #' modifications. +#' @param .path_bin The file path to binned precursor masses. #' @param reframe_mgfs Logical; if TRUE, recalculates the frame indexes of MGFs. #' @param first_search Logical; is the first search (for MGF mass calibration) #' or not. @@ -16,7 +17,7 @@ #' @inheritParams frames_adv #' @inheritParams add_var_masses #' @import parallel -ms2match <- function (mgf_path, aa_masses_all, out_path, +ms2match <- function (mgf_path, aa_masses_all, out_path, .path_bin, mod_indexes, type_ms2ions = "by", maxn_vmods_per_pep = 5L, maxn_sites_per_vmod = 3L, maxn_fnl_per_seq = 64L, maxn_vnl_per_seq = 64L, maxn_vmods_sitescombi_per_pep = 64L, @@ -24,7 +25,8 @@ ms2match <- function (mgf_path, aa_masses_all, out_path, min_mass = 200L, max_mass = 4500L, min_ms2mass = 115L, quant = "none", ppm_reporters = 10L, by_modules = TRUE, reframe_mgfs = FALSE, - + n_13c = NULL, ms1_notches = 0, + # dummies fasta, acc_type, acc_pattern, topn_ms2ions, fixedmods, varmods, @@ -33,7 +35,7 @@ ms2match <- function (mgf_path, aa_masses_all, out_path, min_len, max_len, max_miss, index_mgf_ms2 = FALSE, first_search = FALSE, - .savecall = TRUE, digits = 4L) + .savecall = TRUE) { options(digits = 9L) @@ -89,8 +91,6 @@ ms2match <- function (mgf_path, aa_masses_all, out_path, "^mgf$", "^mgfs$", "Calls")) # pairs expts and theos - .path_bin <- get(".path_bin", envir = .GlobalEnv, inherits = FALSE) - files_a <- list.files(mgf_path, pattern = "^expttheo_", full.names = TRUE) files_b <- list.files(mgf_path, pattern = "^mgftheo_", full.names = TRUE) nfiles_a <- length(files_a) @@ -107,13 +107,13 @@ ms2match <- function (mgf_path, aa_masses_all, out_path, # (matches of secondary ions may use `outer` products and no adjustments) ppm_ms1_bin <- calc_threeframe_ppm(ppm_ms1) ppm_ms2_bin <- calc_threeframe_ppm(ppm_ms2) + ms1_offsets <- find_ms1_offsets(n_13c = n_13c, ms1_notches = ms1_notches) - # if (first_search) subset mgf - - pair_mgftheo(mgf_path = mgf_path, n_modules = length(aa_masses_all), - .path_bin = .path_bin, by_modules = by_modules, - reframe_mgfs = reframe_mgfs, min_mass = min_mass, - ppm_ms1_bin = ppm_ms1_bin, first_search = first_search) + pair_mgftheos(mgf_path = mgf_path, n_modules = length(aa_masses_all), + ms1_offsets = ms1_offsets, by_modules = by_modules, + min_mass = min_mass, max_mass = max_mass, + ppm_ms1_bin = ppm_ms1_bin, .path_bin = .path_bin, + reframe_mgfs = reframe_mgfs, first_search = first_search) rm(list = c("files_a", "files_b", "nfiles_a", "nfiles_b")) @@ -151,7 +151,7 @@ ms2match <- function (mgf_path, aa_masses_all, out_path, ms2_n = integer(), frame = numeric(), matches = list(list()), pep_isdecoy = logical()) - + hms2match(aa_masses_all = aa_masses_all, funs_ms2 = funs_ms2, ms1vmods_all = ms1vmods_all, @@ -171,8 +171,7 @@ ms2match <- function (mgf_path, aa_masses_all, out_path, min_ms2mass = min_ms2mass, index_mgf_ms2 = index_mgf_ms2, by_modules = by_modules, - df0 = df0, - digits = digits) + df0 = df0) qs::qsave(aa_masses_all, faa) @@ -232,12 +231,13 @@ reverse_seqs <- function (seqs) #' #' @param aa_masses_all List(1); The first list of all amino-acid look-ups. #' @param mod_indexes Integer; the indexes of fixed and/or variable -#' modifications +#' modifications. +#' @param .path_bin The file path to binned precursor masses. #' @param reframe_mgfs Logical; if TRUE, recalculates the frame indexes of MGFs #' @param knots The number of knots for spline fits. #' @inheritParams matchMS calib_mgf <- function (mgf_path = NULL, aa_masses_all = NULL,out_path = NULL, - mod_indexes = NULL, type_ms2ions = "by", + .path_bin, mod_indexes = NULL, type_ms2ions = "by", maxn_vmods_per_pep = 5L,maxn_sites_per_vmod = 3L, maxn_fnl_per_seq = 3L, maxn_vnl_per_seq = 3L, maxn_vmods_sitescombi_per_pep = 64L, minn_ms2 = 6L, @@ -249,8 +249,7 @@ calib_mgf <- function (mgf_path = NULL, aa_masses_all = NULL,out_path = NULL, acc_pattern = NULL, topn_ms2ions = 100L, fixedmods = NULL, varmods = NULL, enzyme = "trypsin_p", maxn_fasta_seqs = 200000L, maxn_vmods_setscombi = 512L, - min_len = 7L, max_len = 40L, max_miss = 2L, knots = 50L, - digits = 4L) + min_len = 7L, max_len = 40L, max_miss = 2L, knots = 50L) { on.exit( if (exists(".savecall", envir = fun_env)) { @@ -262,12 +261,8 @@ calib_mgf <- function (mgf_path = NULL, aa_masses_all = NULL,out_path = NULL, fun_env <- environment() args <- names(formals(fun)) args_except <- NULL + args_must <- if (length(args_except)) args[!args %in% args_except] else args - if (length(args_except)) - args_must <- args[!args %in% args_except] - else - args_must <- args - cache_pars <- find_callarg_vals( time = NULL, path = file.path(out_path, "Calls"), @@ -317,6 +312,7 @@ calib_mgf <- function (mgf_path = NULL, aa_masses_all = NULL,out_path = NULL, ms2match(mgf_path = mgf_path, aa_masses_all = aa_masses_all, out_path = out_path, + .path_bin = .path_bin, mod_indexes = mod_indexes, type_ms2ions = type_ms2ions, maxn_vmods_per_pep = maxn_vmods_per_pep, @@ -348,8 +344,7 @@ calib_mgf <- function (mgf_path = NULL, aa_masses_all = NULL,out_path = NULL, max_len = max_len, max_miss = max_miss, first_search = TRUE, - .savecall = FALSE, - digits = digits) + .savecall = FALSE) file.rename(fi_aa2, fi_aa) file.rename(fi_mi2, fi_mi) @@ -381,8 +376,7 @@ calib_mgf <- function (mgf_path = NULL, aa_masses_all = NULL,out_path = NULL, mapply(calib_ms1, fs_mgf, dfs, MoreArgs = list( mgf_path = mgf_path, out_path = out_path, ppm_ms1 = ppm_ms1, - min_mass = min_mass, max_mass = max_mass, knots = knots, - digits = digits), + min_mass = min_mass, max_mass = max_mass, knots = knots), SIMPLIFY = FALSE, USE.NAMES = FALSE) } @@ -394,8 +388,7 @@ calib_mgf <- function (mgf_path = NULL, aa_masses_all = NULL,out_path = NULL, cl, calib_ms1, fs_mgf, dfs, MoreArgs = list( mgf_path = mgf_path, out_path = out_path, ppm_ms1 = ppm_ms1, - min_mass = min_mass, max_mass = max_mass, knots = knots, - digits = digits), + min_mass = min_mass, max_mass = max_mass, knots = knots), SIMPLIFY = FALSE, USE.NAMES = FALSE) @@ -422,7 +415,7 @@ calib_mgf <- function (mgf_path = NULL, aa_masses_all = NULL,out_path = NULL, #' @inheritParams calib_mgf calib_ms1 <- function (filename, df = NULL, mgf_path = NULL, out_path = NULL, ppm_ms1 = 20L, min_mass = 200L, max_mass = 4500L, - knots = 50L, digits = 4L) + knots = 50L) { mgfs <- qs::qread(file.path(mgf_path, filename)) @@ -491,9 +484,7 @@ calib_ms1 <- function (filename, df = NULL, mgf_path = NULL, out_path = NULL, ## update MGF mgfs <- mgfs %>% dplyr::arrange(ms1_mass) %>% - dplyr::filter(ms1_mass >= min_mass, ms1_mass <= max_mass) %>% - dplyr::mutate( - frame = find_ms1_interval(ms1_mass, from = min_mass, ppm = ppm_ms1_bin)) + dplyr::filter(ms1_mass >= min_mass, ms1_mass <= max_mass) # charges <- get_ms1charges(mgfs[["ms1_charge"]]) # mgfs[["ms1_moverz"]] <- (mgfs[["ms1_mass"]] + 1.00727647 * charges)/charges @@ -504,3 +495,17 @@ calib_ms1 <- function (filename, df = NULL, mgf_path = NULL, out_path = NULL, } +#' Finds offsets in precursor masses. +#' +#' @inheritParams matchMS +find_ms1_offsets <- function (n_13c = 0L, ms1_notches = 0) +{ + if (length(n_13c)) + n_13c <- n_13c[n_13c != 0L] + + offsets_13c <- if (length(n_13c)) n_13c * 1.00335483 else NULL + ms1_offsets <- unique(c(offsets_13c, ms1_notches)) + round(ms1_offsets, digits = 4L) +} + + diff --git a/R/quant2.R b/R/quant2.R index 661cb41..03b39b3 100644 --- a/R/quant2.R +++ b/R/quant2.R @@ -4,26 +4,23 @@ #' @param idx The i-th chunk #' @inheritParams matchMS hcalc_tmtint <- function (df, quant = "tmt10", ppm_reporters = 10L, idx = 1L, - out_path = NULL, index_mgf_ms2 = FALSE) + out_path = NULL) { df <- df[, c("raw_file", "pep_mod_group", "pep_scan_num", "rptr_moverz", "rptr_int")] df <- unique(df) - - df <- calc_tmtint(df, quant = quant, ppm_reporters = ppm_reporters, - index_mgf_ms2 = index_mgf_ms2) + df <- calc_tmtint(df, quant = quant, ppm_reporters = ppm_reporters) df[["uniq_id"]] <- with(df, paste(raw_file, pep_mod_group, pep_scan_num, sep = ".")) df[["raw_file"]] <- df[["pep_mod_group"]] <- df[["pep_scan_num"]] <- NULL qs::qsave(df, file.path(out_path, paste0("reporters_", idx, ".rds")), preset = "fast") - + invisible(df) } - #' Reporter-ion quantitation. #' #' Not yet used: \code{`134C` = 134.154565}, \code{`135N` = 135.15160} @@ -34,47 +31,51 @@ hcalc_tmtint <- function (df, quant = "tmt10", ppm_reporters = 10L, idx = 1L, #' compatible higher plexes, for example, \code{tmt16} for \code{tmt12} etc. #' and \code{tmt10} for \code{tmt8} etc. #' @param ppm_reporters The mass tolerance of MS2 reporter ions. -#' @inheritParams matchMS -calc_tmtint <- function (data = NULL, - quant = c("none", "tmt6", "tmt10", "tmt11", "tmt16"), - ppm_reporters = 10L, index_mgf_ms2 = FALSE) +calc_tmtint <- function (data = NULL, quant = "tmt16", ppm_reporters = 10L) { if (quant == "none") return(data) - nms_tmt6 <- c("126", "127N", "128N", "129N", "130N", "131N") + nms_tmt6 <- c("126", "127N", "128N", "129N", "130N", "131N") - nms_tmt10 <- c("126", "127N", "127C", "128N", "128C", "129N", "129C", - "130N", "130C", "131N") + nms_tmt10 <- c("126", "127N", "127C", "128N", "128C", "129N", "129C", + "130N", "130C", "131N") - nms_tmt11 <- c("126", "127N", "127C", "128N", "128C", "129N", "129C", - "130N", "130C", "131N", "131C") + nms_tmt11 <- c("126", "127N", "127C", "128N", "128C", "129N", "129C", + "130N", "130C", "131N", "131C") nms_tmtpro <- c("126", "127N", "127C", "128N", "128C", "129N", "129C", "130N", "130C", "131N", "131C", "132N", "132C", "133N", "133C", "134N") + nms_tmt18 <- c("126", "127N", "127C", "128N", "128C", "129N", "129C", + "130N", "130C", "131N", "131C", "132N", "132C", + "133N", "133C", "134N", "134C", "135N") + + # "C(8) N(1) H(16)" tmts <- c( `126` = 126.127726, `127N` = 127.124761, `127C` = 127.131080, `128N` = 128.128115, `128C` = 128.134435, `129N` = 129.131470, `129C` = 129.137790, `130N` = 130.134825, `130C` = 130.141145, `131N` = 131.138180, `131C` = 131.144499, `132N` = 132.141535, `132C` = 132.147855, `133N` = 133.14489, `133C` = 133.15121, - `134N` = 134.148245) + `134N` = 134.148245, `134C` = 134.155114, `135N` = 135.152149) theos <- switch(quant, tmt6 = tmts[names(tmts) %in% nms_tmt6], tmt10 = tmts[names(tmts) %in% nms_tmt10], tmt11 = tmts[names(tmts) %in% nms_tmt11], tmt16 = tmts[names(tmts) %in% nms_tmtpro], - stop("Unknown TMt type.", call. = FALSE)) + tmt18 = tmts[names(tmts) %in% nms_tmt18], + stop("Unknown TMT type.")) ul <- switch(quant, tmt6 = c(126.1, 131.2), tmt10 = c(126.1, 131.2), tmt11 = c(126.1, 131.2), tmt16 = c(126.1, 134.2), - stop("Unknown TMt type.", call. = FALSE)) + tmt18 = c(126.1, 135.2), + stop("Unknown TMT type.")) # stopifnot(all(c("rptr_moverz", "rptr_int") %in% names(data))) @@ -87,7 +88,7 @@ calc_tmtint <- function (data = NULL, ul = ul, ppm_reporters = ppm_reporters, len = length(theos), - nms = names(theos) + channels = names(theos) ), USE.NAMES = FALSE, SIMPLIFY = FALSE) out <- dplyr::bind_rows(out) @@ -145,7 +146,7 @@ add_rptrs <- function (df = NULL, quant = "none", out_path = NULL) #' @param theos The theoretical m-over-z of reporter ions. #' @param ul The upper and lower bound for reporter-ion m-over-z's. #' @param len The length of reporter-ion plexes. -#' @param nms The names of reporter-ion channels. +#' @param channels The names of reporter-ion channels. #' @inheritParams matchMS #' @examples #' \donttest{ @@ -167,13 +168,13 @@ add_rptrs <- function (df = NULL, quant = "none", out_path = NULL) #' ppm_reporters <- 10 #' ul <- c(126.1, 131.2) #' len <- 10 -#' nms <- names(theos) +#' channels <- names(theos) #' #' x <- mzion:::find_reporter_ints(ms2_moverzs, ms2_ints, theos, ul, ppm_reporters = 10, -#' len , nms) +#' len , channels) #' #' x <- mzion:::find_reporter_ints(ms2_moverzs, ms2_ints, theos, ul, ppm_reporters = 25, -#' len , nms) +#' len , channels) #' #' # Two `129C`, no `127N` etc. #' ms2_moverzs <- c(105.1503, 107.0428, 111.7716, 120.0811, 126.1281, 127.1312, @@ -190,36 +191,35 @@ add_rptrs <- function (df = NULL, quant = "none", out_path = NULL) #' 1990.57, 1758.72, 1655.09, 1460.68, 1641.39, 1721.33) #' #' x <- mzion:::find_reporter_ints(ms2_moverzs, ms2_ints, theos, ul, ppm_reporters = 25, -#' len , nms) +#' len , channels) #' } find_reporter_ints <- function (ms2_moverzs, ms2_ints, theos, ul, - ppm_reporters = 10L, len, nms) + ppm_reporters = 10L, len, channels) { range <- findInterval(ul, ms2_moverzs) - - ms <- ms2_moverzs[range[1]:range[2]] - is <- ms2_ints[range[1]:range[2]] - - idxes <- find_reporters_ppm(theos, ms, ppm_reporters, len, nms) - - if (!length(idxes)) - return(rep(NA, len) %>% `names<-`(nms)) - - # 126 127N 127C 128N 128N 128C - # 135569.00 120048.00 122599.00 3397.98 140551.00 144712.00 - - if (anyDuplicated(names(idxes))) { - idxes <- idxes %>% - split(names(.)) %>% - purrr::imap_int(~ { - if (length(.x) > 1L) { - p <- which.min(abs(ms[.x] - theos[.y])) - .x <- .x[p] - } - - .x - }) %>% - .[nms] + rg <- range[1]:range[2] + ms <- ms2_moverzs[rg] + vs <- ms2_ints[rg] + idxes <- find_reporters_ppm(theos, ms, ppm_reporters, len) + + if (!length(idxes)) { + es <- rep_len(NA_real_, len) + names(es) <- channels + return(es) + } + + if (anyDuplicated(chs <- names(idxes))) { + idxes <- split(idxes, chs) + + idxes <- mapply(function (x, y) { + if (length(x) > 1L) + x <- x[which.min(abs(ms[x] - theos[y]))] + else + x + }, idxes, names(idxes), USE.NAMES = FALSE, SIMPLIFY = TRUE) + + # complete and correct order of channels + idxes <- idxes[channels] } # missing channels: @@ -227,14 +227,14 @@ find_reporter_ints <- function (ms2_moverzs, ms2_ints, theos, ul, # 2 NA 3 4 5 6 8 NA NA NA if (anyNA(names(idxes))) - names(idxes) <- nms - - rptr_ints <- is[idxes] %>% - `names<-`(names(idxes)) + names(idxes) <- channels + rptr_ints <- vs[idxes] + names(rptr_ints) <- names(idxes) + if (length(rptr_ints) < len) { - es <- rep(NA, len) - names(es) <- nms + es <- rep_len(NA_real_, len) + names(es) <- channels es[names(rptr_ints)] <- rptr_ints } else { @@ -251,7 +251,7 @@ find_reporter_ints <- function (ms2_moverzs, ms2_ints, theos, ul, #' reporter ions). #' @inheritParams find_reporter_ints #' @return A vector of indexes -find_reporters_ppm <- function (theos, expts, ppm_reporters = 10L, len, nms) +find_reporters_ppm <- function (theos, expts, ppm_reporters = 10L, len) { d <- outer(theos, expts, "find_ppm_error") row_cols <- which(abs(d) <= ppm_reporters, arr.ind = TRUE) diff --git a/R/scores.R b/R/scores.R index 7e80525..ed56138 100644 --- a/R/scores.R +++ b/R/scores.R @@ -2,61 +2,54 @@ #' #' @param ms2s A vector of theoretical MS2 m-over-z values. #' @inheritParams matchMS -add_seions <- function (ms2s, type_ms2ions = "by", digits = 4L) +add_seions <- function (ms2s, type_ms2ions = "by") { + # proton <- 1.00727647 + # h2o <- 18.010565 + # nh3 <- 17.026549 + len <- length(ms2s) if (type_ms2ions == "by") { - proton <- 1.00727647 - h2o <- 18.010565 - nh3 <- 17.026549 - bs <- ms2s[1:(len/2)] ys <- ms2s[(len/2+1):len] - b2s <- (bs + proton)/2 - bstars <- bs - nh3 - bstar2s <- (bstars + proton)/2 - b0s <- bs - h2o - b02s <- (b0s + proton)/2 + b2s <- (bs + 1.00727647)/2 + bstars <- bs - 17.026549 + bstar2s <- (bstars + 1.00727647)/2 + b0s <- bs - 18.010565 + b02s <- (b0s + 1.00727647)/2 - y2s <- (ys + proton)/2 - ystars <- ys - nh3 - ystar2s <- (ystars + proton)/2 - y0s <- ys - h2o - y02s <- (y0s + proton)/2 + y2s <- (ys + 1.00727647)/2 + ystars <- ys - 17.026549 + ystar2s <- (ystars + 1.00727647)/2 + y0s <- ys - 18.010565 + y02s <- (y0s + 1.00727647)/2 - round(c(b2s, bstars, bstar2s, b0s, b02s, y2s, ystars, ystar2s, y0s, y02s), - digits = digits) + c(b2s, bstars, bstar2s, b0s, b02s, y2s, ystars, ystar2s, y0s, y02s) } else if (type_ms2ions == "ax") { - proton <- 1.00727647 - h2o <- 18.010565 - nh3 <- 17.026549 - as <- ms2s[1:(len/2)] xs <- ms2s[(len/2+1):len] - a2s <- (as + proton)/2 - astars <- as - nh3 - astar2s <- (astars + proton)/2 - a0s <- as - h2o - a02s <- (a0s + proton)/2 + a2s <- (as + 1.00727647)/2 + astars <- as - 17.026549 + astar2s <- (astars + 1.00727647)/2 + a0s <- as - 18.010565 + a02s <- (a0s + 1.00727647)/2 - x2s <- (xs + proton)/2 + x2s <- (xs + 1.00727647)/2 - round(c(a2s, astars, astar2s, a0s, a02s, x2s), digits = digits) + c(a2s, astars, astar2s, a0s, a02s, x2s) } else if (type_ms2ions == "cz") { - proton <- 1.00727647 - cs <- ms2s[1:(len/2)] zs <- ms2s[(len/2+1):len] - c2s <- (cs + proton)/2 - z2s <- (zs + proton)/2 + c2s <- (cs + 1.00727647)/2 + z2s <- (zs + 1.00727647)/2 - round(c(c2s, z2s), digits = digits) + c(c2s, z2s) } } @@ -262,18 +255,19 @@ calc_probi_byvmods <- function (df, nms, expt_moverzs, expt_ints, d2 = 1E-5, index_mgf_ms2 = FALSE, tally_ms2ints = TRUE, digits = 4L) { - df_theo <- df$theo + df_theo <- df[["theo"]] m <- length(df_theo) ## df2 - tt2 <- add_seions(df_theo, type_ms2ions = type_ms2ions, digits = digits) + tt2 <- add_seions(df_theo, type_ms2ions = type_ms2ions) df2 <- match_ex2th2(expt_moverzs, tt2, min_ms2mass, d2, index_mgf_ms2) ith2 <- df2[["ith"]] iex2 <- df2[["iex"]] ## 1. int2 (secondary intensities) len <- length(df2[["expt"]]) - df2[["int"]] <- rep(NA_real_, len) + df2[["int"]] <- rep_len(NA_real_, len) + # df2[["int"]] <- rep_len(0, len) df2[["int"]][ith2] <- expt_ints[iex2] # works if iex2 contains NA facs <- rep(seq_len(len/m), each = m) @@ -290,15 +284,15 @@ calc_probi_byvmods <- function (df, nms, expt_moverzs, expt_ints, # int2 # 0 11520 7697 0 0 0 0 0 0 0 0 59843 222989 12091 7927 0 10710 0 - ## 2. y ith <- df[["ith"]] iex <- df[["iex"]] - df[["int"]] <- rep(NA_integer_, m) + df[["int"]] <- rep_len(NA_integer_, m) df[["int"]][ith] <- expt_ints[iex] - nudbl <- rep(NA_real_, topn_ms2ions) - nuint <- rep(NA_integer_, topn_ms2ions) + nudbl <- rep_len(NA_real_, topn_ms2ions) + nuint <- rep_len(NA_integer_, topn_ms2ions) + # nuint <- rep_len(0L, topn_ms2ions) y <- list(expt = expt_moverzs, int = expt_ints, theo = nudbl, idx = nuint, int2 = nuint) y[["theo"]][iex] <- df_theo[ith] y[["idx"]][iex] <- ith @@ -307,6 +301,7 @@ calc_probi_byvmods <- function (df, nms, expt_moverzs, expt_ints, ## 3. join `int2` to `y` y_idx <- y[["idx"]] ok_iex <- .Internal(which(!is.na(y_idx))) + # ok_iex <- .Internal(which(y_idx > 0L)) y_ith <- y_idx[ok_iex] y[["int2"]][ok_iex] <- int2[y_ith] @@ -359,13 +354,10 @@ calc_probi_byvmods <- function (df, nms, expt_moverzs, expt_ints, x_ <- x[-burn_ins] k_ <- k[-burn_ins] - if (length(x_)) { - prs <- stats::dhyper(x = x_, m = m, n = N, k = k_) - pr <- min(prs, na.rm = TRUE) - } - else { + if (length(x_)) + pr <- min(stats::dhyper(x = x_, m = m, n = N, k = k_), na.rm = TRUE) + else pr <- .5 - } ## outputs list(pep_ivmod = nms, @@ -926,7 +918,8 @@ calcpepsc <- function (file, im_path, pep_fmod_all, pep_vmod_all, cols_sc <- c("pep_seq", "pep_n_ms2", "pep_scan_title", "pep_exp_mz", "pep_exp_mr", "pep_tot_int", "pep_exp_z", "pep_ret_range", - "pep_scan_num", "raw_file", "pep_mod_group", "pep_frame", + "pep_scan_num", "raw_file", "pep_mod_group", "pep_ms1_offset", + # "pep_frame", "pep_fmod", "pep_vmod", "pep_isdecoy", "pep_calc_mr", "pep_ivmod", "pep_prob", "pep_len", "pep_ms2_moverzs", "pep_ms2_ints", @@ -961,7 +954,8 @@ calcpepsc <- function (file, im_path, pep_fmod_all, pep_vmod_all, return (dfb) } - df[["uniq_id"]] <- paste(df[["pep_scan_num"]], df[["raw_file"]], sep = "@") + # df[["uniq_id"]] <- paste(df[["pep_scan_num"]], df[["raw_file"]], sep = "@") + df[["uniq_id"]] <- paste(df[["pep_scan_num"]], df[["raw_file"]], df[["pep_ms1_offset"]], sep = "@") esscols <- c("pep_ms2_moverzs", "pep_ms2_ints", "matches", "pep_n_ms2", "uniq_id") path_df2 <- file.path(im_path, paste0("df2_", idx, ".rds")) df2 <- df[, -which(names(df) %in% esscols), drop = FALSE] @@ -1038,8 +1032,7 @@ calcpepsc <- function (file, im_path, pep_fmod_all, pep_vmod_all, quant = quant, ppm_reporters = ppm_reporters, idx = idx, - out_path = im_path, - index_mgf_ms2 = index_mgf_ms2) + out_path = im_path) qs::qsave(df[, cols_lt, drop = FALSE], file.path(im_path, paste0("list_table_", idx, ".rds")), @@ -1064,7 +1057,8 @@ hadd_primatches <- function (out_path = NULL, # the same as those in calcpepsc cols_sc <- c("pep_seq", "pep_n_ms2", "pep_scan_title", "pep_exp_mz", "pep_exp_mr", "pep_tot_int", "pep_exp_z", "pep_ret_range", "pep_scan_num", "raw_file", - "pep_mod_group", "pep_frame", "pep_fmod", "pep_vmod", "pep_isdecoy", + "pep_mod_group", "pep_ms1_offset", # "pep_frame", + "pep_fmod", "pep_vmod", "pep_isdecoy", "pep_calc_mr", "pep_ivmod", "pep_prob", "pep_len", "pep_ms2_moverzs", "pep_ms2_ints", "pep_ms2_theos", "pep_ms2_theos2", @@ -1664,8 +1658,8 @@ prep_pepfdr_td <- function (td = NULL, out_path, enzyme = "trypsin_p", } cts <- dplyr::count(dplyr::group_by(td, "pep_mod_group"), pep_mod_group) - max_i <- which.max(cts$n)[[1]] - top3s <- which_topx2(cts$n, 3)[1:3] + max_i <- cts$pep_mod_group[which.max(cts$n)[[1]]] + top3s <- cts$pep_mod_group[which_topx2(cts$n, 3)[1:3]] top3s <- top3s[!is.na(top3s)] enzyme <- tolower(enzyme) @@ -1709,8 +1703,9 @@ prep_pepfdr_td <- function (td = NULL, out_path, enzyme = "trypsin_p", } if (!nrow(td)) - stop("Found nothing: empty targets and decoys.") - + stop("No entries at fdr_group = ", fdr_group, ".", + "May consider a different `fdr_group`.") + td } @@ -2663,45 +2658,26 @@ find_ppm_outer_bycombi <- function (X, Y, ppm_ms2 = 20L) match_ex2th2 <- function (expt, theo, min_ms2mass = 115L, d = 1E-5, index_mgf_ms2 = FALSE) { - th <- index_mz(theo, from = min_ms2mass, d = d) - ex <- if (index_mgf_ms2) expt else index_mz(expt, from = min_ms2mass, d = d) - ith <- .Internal(which(th %fin% ex | (th - 1L) %fin% ex | (th + 1L) %fin% ex)) - - # if: e.g. th[ith+1] = th[ith] + 1 -> can have NA in iex: - # th[ith+1] not in ex but th[ith+1] - 1 - # OK to keep the NA: - # es initiated as all NA, OK assign NA <- NA during es[ith] <- expt[iex] - # in intensity tally of experimental intensity: - # `%+%` default with na.rm = TRUE - - thi <- th[ith] - iex <- fastmatch::fmatch(thi, ex) + th <- index_mz(theo, from = min_ms2mass, d = d) + ex <- if (index_mgf_ms2) expt else index_mz(expt, from = min_ms2mass, d = d) + t2e <- fastmatch::fmatch(c(th, th - 1L, th + 1L), ex, nomatch = 0L) - # indexes before and after - nas <- .Internal(which(is.na(iex))) + l <- length(th) + mi <- t2e[1:l] + bf <- t2e[(l + 1L):(l + l)] + af <- t2e[(l + l + 1L):(l * 3L)] - if (length(nas)) { - bf <- fastmatch::fmatch(thi - 1L, ex) - - if (all(is.na(bf))) { - af <- fastmatch::fmatch(thi + 1L, ex) - iex[nas] <- af[nas] - } - else { - iex[nas] <- bf[nas] - nas <- .Internal(which(is.na(iex))) - - if (length(nas)) { - af <- fastmatch::fmatch(thi + 1L, ex) - iex[nas] <- af[nas] - } - } - } + okmi <- mi > 0L + okbf <- bf > 0L + okaf <- af > 0L + + ith <- c(.Internal(which(okmi)), .Internal(which(okbf)), .Internal(which(okaf))) + iex <- c(mi[okmi], bf[okbf], af[okaf]) - es <- rep(NA_real_, length(th)) + es <- rep_len(NA_real_, l) names(es) <- names(th) es[ith] <- expt[iex] - + list(theo = theo, expt = es, ith = ith, iex = iex, m = length(iex)) } diff --git a/R/utils_ui.R b/R/utils_ui.R index 34c1a59..d72d2bd 100644 --- a/R/utils_ui.R +++ b/R/utils_ui.R @@ -91,8 +91,7 @@ calc_monopeptide <- function (aa_seq, fixedmods, varmods, maxn_vmods_per_pep = Inf, maxn_sites_per_vmod = Inf, min_mass = 200L, - max_mass = 4500L, - digits = 4L) + max_mass = 4500L) { options(digits = 9L) @@ -110,8 +109,7 @@ calc_monopeptide <- function (aa_seq, fixedmods, varmods, maxn_vmods_per_pep = maxn_vmods_per_pep, maxn_sites_per_vmod = maxn_sites_per_vmod, min_mass = min_mass, - max_mass = max_mass, - digits = digits) + max_mass = max_mass) }) attrs <- purrr::map(aa_masses_all, attributes) @@ -138,8 +136,7 @@ calc_monopep <- function (aa_seq, aa_masses, maxn_vmods_per_pep = 5L, maxn_sites_per_vmod = 3L, min_mass = 200L, - max_mass = 4500L, - digits = 4L) + max_mass = 4500L) { if (is.na(aa_seq)) return(NULL) @@ -153,7 +150,7 @@ calc_monopep <- function (aa_seq, aa_masses, `+`(aa_masses["N-term"]) %>% `+`(aa_masses["C-term"]) %>% stats::setNames(aa_seq) %>% - round(digits = digits) + round(digits = 4L) if (type == "amods- tmod- vnl- fnl-") { return(mass) @@ -177,7 +174,7 @@ calc_monopep <- function (aa_seq, aa_masses, if (type %in% c("amods- tmod- vnl- fnl+", "amods- tmod+ vnl- fnl+")) { fnl_combi <- expand_grid_rows0(fmods_nl) deltas <- delta_ms1_a0_fnl1(fnl_combi, aas, aa_masses) - masses <- round(mass - deltas, digits = digits) + masses <- round(mass - deltas, digits = 4L) } else { masses <- mass @@ -207,14 +204,13 @@ calc_monopep <- function (aa_seq, aa_masses, vmods_combi <- unique_mvmods(amods = amods, ntmod = NULL, ctmod = NULL, aa_masses = aa_masses, aas = aas, maxn_vmods_per_pep = maxn_vmods_per_pep, - maxn_sites_per_vmod = maxn_sites_per_vmod, - digits = digits) %>% + maxn_sites_per_vmod = maxn_sites_per_vmod) %>% find_intercombi() deltas <- unname(lapply(vmods_combi, function (x) sum(aa_masses[x]))) masses <- - sapply(deltas, function (x) round(unlist(masses) + x, digits = digits)) + sapply(deltas, function (x) round(unlist(masses) + x, digits = 4L)) } masses @@ -375,7 +371,7 @@ calc_ms2ionseries <- function (aa_seq, fixedmods, varmods, maxn_sites_per_vmod = 3L, maxn_vmods_sitescombi_per_pep = 32L, maxn_fnl_per_seq = 3L, - maxn_vnl_per_seq = 3L, digits = 4L) + maxn_vnl_per_seq = 3L) { options(digits = 9L) @@ -398,9 +394,9 @@ calc_ms2ionseries <- function (aa_seq, fixedmods, varmods, maxn_sites_per_vmod = maxn_sites_per_vmod, maxn_vmods_sitescombi_per_pep = maxn_vmods_sitescombi_per_pep, maxn_fnl_per_seq = maxn_fnl_per_seq, - maxn_vnl_per_seq = maxn_vnl_per_seq, digits) + maxn_vnl_per_seq = maxn_vnl_per_seq) - sec <- lapply(pri, add_seions, type_ms2ions = type_ms2ions, digits = digits) + sec <- lapply(pri, add_seions, type_ms2ions = type_ms2ions) list(pri = pri, sec = sec) }, peps, aa_masses_all, SIMPLIFY = FALSE, USE.NAMES = FALSE) @@ -451,8 +447,7 @@ calc_ms2ions <- function (aa_seq, aa_masses, ms1_mass = NULL, mod_indexes = NULL type_ms2ions = "by", maxn_vmods_per_pep = 5L, maxn_sites_per_vmod = 3L, maxn_vmods_sitescombi_per_pep = 64L, - maxn_fnl_per_seq = 3L, maxn_vnl_per_seq = 3L, - digits = 4L) + maxn_fnl_per_seq = 3L, maxn_vnl_per_seq = 3L) { # tmt6_mass <- 229.162932 # tmtpro_mass <- 304.207146 @@ -524,8 +519,7 @@ calc_ms2ions <- function (aa_seq, aa_masses, ms1_mass = NULL, mod_indexes = NULL maxn_sites_per_vmod = maxn_sites_per_vmod, maxn_fnl_per_seq = maxn_fnl_per_seq, maxn_vnl_per_seq = maxn_vnl_per_seq, - maxn_vmods_sitescombi_per_pep = maxn_vmods_sitescombi_per_pep, - digits = digits)) + maxn_vmods_sitescombi_per_pep = maxn_vmods_sitescombi_per_pep)) } @@ -600,8 +594,7 @@ unique_mvmods <- function (amods, ntmod, ctmod, aa_masses, aas, maxn_vmods_per_pep = 5L, maxn_sites_per_vmod = 3L, .ms1_vmodsets = NULL, - .base_ent = NULL, - digits = 4L) + .base_ent = NULL) { # (6) "amods- tmod- vnl- fnl+" if (!length(amods)) @@ -617,8 +610,7 @@ unique_mvmods <- function (amods, ntmod, ctmod, aa_masses, aas, maxn_vmods_per_pep = maxn_vmods_per_pep, maxn_sites_per_vmod = maxn_sites_per_vmod, .ms1_vmodsets = .ms1_vmodsets, - .base_ent = .base_ent, - digits = digits) + .base_ent = .base_ent) }) } @@ -657,8 +649,7 @@ vmods_elements <- function (aas, maxn_vmods_per_pep = 5L, maxn_sites_per_vmod = 3L, .ms1_vmodsets = NULL, - .base_ent = NULL, - digits = 4L) + .base_ent = NULL) { residue <- residue_mods[[1]] diff --git a/data-raw/ms2match_modules.R b/data-raw/ms2match_modules.R index fd3dfe6..bf2558c 100644 --- a/data-raw/ms2match_modules.R +++ b/data-raw/ms2match_modules.R @@ -1,3 +1,39 @@ +x = mgf_frames[[1]] +y = theopeps[[1]] +from = 133 +to = 133 +sta = as.integer(names(x[from])) - 1L +end = as.integer(names(x[to])) + 1L + +df <- frames_adv( + x[from:to], y[which(names(y) == sta):which(names(y) == end)], + aa_masses = aa_masses, + ms1vmods = ms1vmods, + ms2vmods = ms2vmods, + ntmod = ntmod, + ctmod = ctmod, + ntmass = ntmass, + ctmass = ctmass, + amods = amods, + vmods_nl = vmods_nl, + fmods_nl = fmods_nl, + pep_mod_group = pep_mod_group, + mod_indexes = mod_indexes, + type_ms2ions = type_ms2ions, + maxn_vmods_per_pep = maxn_vmods_per_pep, + maxn_sites_per_vmod = maxn_sites_per_vmod, + maxn_fnl_per_seq = maxn_fnl_per_seq, + maxn_vnl_per_seq = maxn_vnl_per_seq, + maxn_vmods_sitescombi_per_pep = maxn_vmods_sitescombi_per_pep, + minn_ms2 = minn_ms2, + ppm_ms1 = ppm_ms1, + ppm_ms2 = ppm_ms2, + min_ms2mass = min_ms2mass, + index_mgf_ms2 = index_mgf_ms2, + ms1_offsets = ms1_offsets, + FUN = FUN +) + ## Module 9 # ms2match_a1_vnl1_fnl0 (Kh_001) i=100 diff --git a/man/add_ms1_13c.Rd b/man/add_ms1_13c.Rd index 3feaf5e..3cf8f13 100644 --- a/man/add_ms1_13c.Rd +++ b/man/add_ms1_13c.Rd @@ -4,23 +4,18 @@ \alias{add_ms1_13c} \title{Adds Carbon-13 masses.} \usage{ -add_ms1_13c(peps, n_13c = 1L, max_mass = 4500L) +add_ms1_13c(peps, n_13c = 0L, max_mass = 4500L) } \arguments{ \item{peps}{A named vector of peptide sequences. Sequences in names and masses in values.} -\item{n_13c}{A non-negative integer; the maximum number of 13C off-sets for -consideration in MS1 masses. The default is 0 with no off-sets. -Peak-pickings by various MGF conversion tools may have attempted to adjust -precursor masses to the corresponding mono-isotopic masses in isotope -envelopes. Nevertheless, by setting \code{n_13c = 1}, some increases in the -number of PSMs may be readily achieved at a relatively small cost of search -time.} +\item{n_13c}{Number(s) of 13C off-sets in precursor masses, for example, over +the range of \code{-1:2}. The default is 0.} \item{max_mass}{A positive integer; the maximum precursor mass for interrogation.} } \description{ -Adds Carbon-13 masses. +Not currently used. } diff --git a/man/add_ms1_notches.Rd b/man/add_ms1_notches.Rd new file mode 100644 index 0000000..31ea581 --- /dev/null +++ b/man/add_ms1_notches.Rd @@ -0,0 +1,20 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/ms1_precursors.R +\name{add_ms1_notches} +\alias{add_ms1_notches} +\title{Adds offsets of MS1 masses.} +\usage{ +add_ms1_notches(peps, masses = 0, max_mass = 4500L) +} +\arguments{ +\item{peps}{A named vector of peptide sequences. Sequences in names and +masses in values.} + +\item{masses}{The masses of notches.} + +\item{max_mass}{A positive integer; the maximum precursor mass for +interrogation.} +} +\description{ +Not currently used. +} diff --git a/man/add_seions.Rd b/man/add_seions.Rd index 03939ed..81325e8 100644 --- a/man/add_seions.Rd +++ b/man/add_seions.Rd @@ -4,7 +4,7 @@ \alias{add_seions} \title{Adds secondary ions of b0, y0 etc.} \usage{ -add_seions(ms2s, type_ms2ions = "by", digits = 4L) +add_seions(ms2s, type_ms2ions = "by") } \arguments{ \item{ms2s}{A vector of theoretical MS2 m-over-z values.} @@ -13,9 +13,6 @@ add_seions(ms2s, type_ms2ions = "by", digits = 4L) \href{http://www.matrixscience.com/help/fragmentation_help.html}{ MS2 ions}. Values are in one of "by", "ax" and "cz". The default is "by" for b- and y-ions.} - -\item{digits}{A non-negative integer; the number of decimal places to be -used. The default is 4.} } \description{ Adds secondary ions of b0, y0 etc. diff --git a/man/binTheoSeqs.Rd b/man/binTheoSeqs.Rd index 86e71fa..a9649eb 100644 --- a/man/binTheoSeqs.Rd +++ b/man/binTheoSeqs.Rd @@ -10,6 +10,7 @@ binTheoSeqs( min_mass = 200L, max_mass = 4500L, ppm_ms1 = 10L, + enzyme = "trypsin_p", out_path = NULL ) } @@ -26,6 +27,8 @@ binTheoSeqs( \item{ppm_ms1}{Numeric; (half of) the error tolerance of MS1 mass in ppm.} +\item{enzyme}{The assume enzyme activity.} + \item{out_path}{The output path.} } \value{ diff --git a/man/bin_ms1masses.Rd b/man/bin_ms1masses.Rd index 482f01d..42531e2 100644 --- a/man/bin_ms1masses.Rd +++ b/man/bin_ms1masses.Rd @@ -8,12 +8,15 @@ bin_ms1masses( res = NULL, min_mass = 200L, max_mass = 4500L, + min_len = 7L, + max_len = 40L, ppm_ms1 = 20L, use_ms1_cache = TRUE, .path_cache = NULL, .path_ms1masses = NULL, is_ms1_three_frame = TRUE, out_path = NULL, + enzyme = "trypsin_p", sys_ram = 24L ) } @@ -24,6 +27,12 @@ bin_ms1masses( \item{max_mass}{A maximum mass of precursors.} +\item{min_len}{A positive integer; the minimum length of peptide sequences +for considerations. Shorter peptides will be excluded. The default is 7.} + +\item{max_len}{A positive integer; the maximum length of peptide sequences +for considerations. Longer peptides will be excluded. The default is 40.} + \item{ppm_ms1}{A positive integer; the mass tolerance of MS1 species. The default is 20.} @@ -47,6 +56,10 @@ preceding, current and following.} \item{out_path}{A file path of outputs.} +\item{enzyme}{A character string; the proteolytic specificity of the assumed +enzyme will be used to generate peptide sequences from protein entries. The +default is \code{Trypsin_P}. See also parameter \code{custom_enzyme}.} + \item{sys_ram}{A putative value of system RAM.} } \description{ diff --git a/man/bions_base.Rd b/man/bions_base.Rd index a8747c5..bd3524f 100644 --- a/man/bions_base.Rd +++ b/man/bions_base.Rd @@ -6,83 +6,53 @@ \alias{axions} \alias{bions_base} \alias{yions_base} -\alias{b2ions_base} -\alias{bstarions} -\alias{bstar2ions} -\alias{b0ions} -\alias{b02ions} -\alias{y2ions} -\alias{ystarions} -\alias{ystar2ions} -\alias{y0ions} -\alias{y02ions} \alias{cions_base} -\alias{c2ions} \alias{zions_base} +\alias{c2ions} \alias{z2ions} \alias{aions_base} +\alias{xions_base} \alias{a2ions} \alias{astarions} \alias{astar2ions} \alias{a0ions} \alias{a02ions} -\alias{xions_base} \alias{x2ions} \title{Masses of singly-charged b- and y-ions.} \usage{ -byions(ntmass, ctmass, aam, digits = 4L) - -czions(ntmass, ctmass, aam, digits = 4L) - -axions(ntmass, ctmass, aam, digits = 4L) - -bions_base(aam, tmass, digits = 4L) - -yions_base(aam, tmass, digits = 4L) - -b2ions_base(aam, tmass, digits = 4L, n = 2L) - -bstarions(aam, tmass, digits = 4L) - -bstar2ions(aam, tmass, digits = 4L, n = 2L) - -b0ions(aam, tmass, digits = 4L) +byions(ntmass, ctmass, aam) -b02ions(aam, tmass, digits = 4L, n = 2L) +czions(ntmass, ctmass, aam) -y2ions(aam, tmass, digits = 4L, n = 2L) +axions(ntmass, ctmass, aam) -ystarions(aam, tmass, digits = 4L) +bions_base(aam, ntmass) -ystar2ions(aam, tmass, digits = 4L, n = 2L) +yions_base(aam, ctmass) -y0ions(aam, tmass, digits = 4L) +cions_base(aam, ntmass) -y02ions(aam, tmass, digits = 4L, n = 2L) +zions_base(aam, ctmass) -cions_base(aam, tmass, digits = 4L) +c2ions(aam, ntmass, n = 2L) -c2ions(aam, tmass, digits = 4L, n = 2L) +z2ions(aam, ctmass, n = 2L) -zions_base(aam, tmass, digits = 4L) +aions_base(aam, ntmass) -z2ions(aam, tmass, digits = 4L, n = 2L) +xions_base(aam, ctmass) -aions_base(aam, tmass, digits = 4L) +a2ions(aam, ntmass, n = 2L) -a2ions(aam, tmass, digits = 4L, n = 2L) +astarions(aam, ntmass) -astarions(aam, tmass, digits = 4L) +astar2ions(aam, ntmass, n = 2L) -astar2ions(aam, tmass, digits = 4L, n = 2L) +a0ions(aam, ntmass) -a0ions(aam, tmass, digits = 4L) +a02ions(aam, ntmass, n = 2L) -a02ions(aam, tmass, digits = 4L, n = 2L) - -xions_base(aam, tmass, digits = 4L) - -x2ions(aam, tmass, digits = 4L, n = 2L) +x2ions(aam, ctmass, n = 2L) } \arguments{ \item{ntmass}{The mass of a fixed or variable N-term modification.} @@ -95,16 +65,25 @@ x2ions(aam, tmass, digits = 4L, n = 2L) The masses reflects fixed/variable modifications, and/or fixed/variable neutral losses.} -\item{digits}{Integer; the number of decimal places to be used.} - -\item{tmass}{The mass of a fixed or variable N-term or C-term modification.} - \item{n}{The charge state.} } \description{ +b-ions first, then y-ions + For (1) "amods- tmod- vnl- fnl-", (2) "amods- tmod+ vnl- fnl-". -\code{H2O = 18.010565}. +# (1) OH (C-term), + H (neutralizes the N-term on a fragment) + H+ +# (2) Other C-term (other than OH) + H + H+: X + 1.007825 + 1.00727647 + +\code{NH3 = 17.026549} + +\code{CO = 27.9949146} + +\code{+CO -H2 = 27.9949146 - 2 * 1.007825} + +\code{-CO -NH3 = -(27.9949146 + 17.026549)} + +\code{-CO -H2O = -(27.9949146 + 18.010565)} } \examples{ \donttest{ @@ -206,6 +185,5 @@ if (!length(ctmod)) { b <- mzion:::bions_base(aam, ntmass) y <- mzion:::yions_base(aam, ctmass) - } } diff --git a/man/calc_aamasses.Rd b/man/calc_aamasses.Rd index b5f1100..de5ddcb 100644 --- a/man/calc_aamasses.Rd +++ b/man/calc_aamasses.Rd @@ -8,6 +8,7 @@ calc_aamasses( fixedmods = c("TMT6plex (K)", "Carbamidomethyl (. = C)"), varmods = c("TMT6plex (N-term)", "Acetyl (Protein N-term)", "Oxidation (M)", "Deamidated (N)", "Gln->pyro-Glu (N-term = Q)"), + rm_dup_term_anywhere = TRUE, aa_masses = NULL, varlabs = NULL, mod_motifs = NULL, @@ -21,6 +22,10 @@ calc_aamasses( \item{varmods}{A character vector of variable modifications.} +\item{rm_dup_term_anywhere}{Logical; if TRUE, removes combinations in +variable modifications with site(s) in positions of both terminal and +anywhere, e.g., "Gln->pyro-Glu (N-term = Q)" and "Deamidated (Q).} + \item{aa_masses}{An amino-acid mass lookup.} \item{varlabs}{Character string(s) of variable isotopic labels. See examples diff --git a/man/calc_monopep.Rd b/man/calc_monopep.Rd index 14a5e7b..6732f87 100644 --- a/man/calc_monopep.Rd +++ b/man/calc_monopep.Rd @@ -10,8 +10,7 @@ calc_monopep( maxn_vmods_per_pep = 5L, maxn_sites_per_vmod = 3L, min_mass = 200L, - max_mass = 4500L, - digits = 4L + max_mass = 4500L ) } \arguments{ @@ -33,8 +32,6 @@ against low molecular-weight precursors is \code{min_len}).} \item{max_mass}{A positive integer; the maximum precursor mass for interrogation.} - -\item{digits}{Integer; the number of decimal places to be used.} } \description{ Only used for calc_monopeptide at a user's interface. Typically coupled to diff --git a/man/calc_monopeptide.Rd b/man/calc_monopeptide.Rd index 7cadcc3..afe673c 100644 --- a/man/calc_monopeptide.Rd +++ b/man/calc_monopeptide.Rd @@ -12,8 +12,7 @@ calc_monopeptide( maxn_vmods_per_pep = Inf, maxn_sites_per_vmod = Inf, min_mass = 200L, - max_mass = 4500L, - digits = 4L + max_mass = 4500L ) } \arguments{ @@ -40,8 +39,6 @@ against low molecular-weight precursors is \code{min_len}).} \item{max_mass}{A positive integer; the maximum precursor mass for interrogation.} - -\item{digits}{Integer; the number of decimal places to be used.} } \description{ Only for direct uses from an R console (with trade-offs in speed). diff --git a/man/calc_ms2ions.Rd b/man/calc_ms2ions.Rd index bb1f650..6a35775 100644 --- a/man/calc_ms2ions.Rd +++ b/man/calc_ms2ions.Rd @@ -14,8 +14,7 @@ calc_ms2ions( maxn_sites_per_vmod = 3L, maxn_vmods_sitescombi_per_pep = 64L, maxn_fnl_per_seq = 3L, - maxn_vnl_per_seq = 3L, - digits = 4L + maxn_vnl_per_seq = 3L ) } \arguments{ @@ -53,8 +52,6 @@ or \code{0}.} permutative neutral losses per peptide sequence for variable modifications. To bypass the combinatorial of neutral losses, set \code{maxn_vnl_per_seq = 1} or \code{0}.} - -\item{digits}{Integer; the number of decimal places to be used.} } \description{ For a given type of fragmentation. Minimal error handling for speeds. diff --git a/man/calc_ms2ions_a1_vnl0_fnl0.Rd b/man/calc_ms2ions_a1_vnl0_fnl0.Rd index 2e43f7c..9658a46 100644 --- a/man/calc_ms2ions_a1_vnl0_fnl0.Rd +++ b/man/calc_ms2ions_a1_vnl0_fnl0.Rd @@ -12,8 +12,7 @@ calc_ms2ions_a1_vnl0_fnl0( ntmass, ctmass, type_ms2ions = "by", - mod_indexes, - digits = 4L + mod_indexes ) } \arguments{ @@ -38,9 +37,6 @@ ions}. Values are in one of "by", "ax" and "cz". The default is "by" for b- and y-ions.} \item{mod_indexes}{Modification indexes.} - -\item{digits}{A non-negative integer; the number of decimal places to be -used. The default is 4.} } \description{ Helper for the calculation of MS2 ion series. diff --git a/man/calc_ms2ions_a1_vnl0_fnl1.Rd b/man/calc_ms2ions_a1_vnl0_fnl1.Rd index ed5ff3a..e25356a 100644 --- a/man/calc_ms2ions_a1_vnl0_fnl1.Rd +++ b/man/calc_ms2ions_a1_vnl0_fnl1.Rd @@ -14,8 +14,7 @@ calc_ms2ions_a1_vnl0_fnl1( ntmass, ctmass, type_ms2ions = "by", - mod_indexes, - digits = 4L + mod_indexes ) } \arguments{ @@ -47,9 +46,6 @@ ions}. Values are in one of "by", "ax" and "cz". The default is "by" for b- and y-ions.} \item{mod_indexes}{Modification indexes.} - -\item{digits}{A non-negative integer; the number of decimal places to be -used. The default is 4.} } \description{ Calculates diff --git a/man/calc_ms2ions_a1_vnl1_fnl0.Rd b/man/calc_ms2ions_a1_vnl1_fnl0.Rd index ec152f5..ae55b17 100644 --- a/man/calc_ms2ions_a1_vnl1_fnl0.Rd +++ b/man/calc_ms2ions_a1_vnl1_fnl0.Rd @@ -13,8 +13,7 @@ calc_ms2ions_a1_vnl1_fnl0( ntmass, ctmass, type_ms2ions = "by", - mod_indexes, - digits = 4L + mod_indexes ) } \arguments{ @@ -43,9 +42,6 @@ ions}. Values are in one of "by", "ax" and "cz". The default is "by" for b- and y-ions.} \item{mod_indexes}{Modification indexes.} - -\item{digits}{A non-negative integer; the number of decimal places to be -used. The default is 4.} } \description{ Calculates MS2 ions. diff --git a/man/calc_ms2ionseries.Rd b/man/calc_ms2ionseries.Rd index f8bd962..fc19f20 100644 --- a/man/calc_ms2ionseries.Rd +++ b/man/calc_ms2ionseries.Rd @@ -15,8 +15,7 @@ calc_ms2ionseries( maxn_sites_per_vmod = 3L, maxn_vmods_sitescombi_per_pep = 32L, maxn_fnl_per_seq = 3L, - maxn_vnl_per_seq = 3L, - digits = 4L + maxn_vnl_per_seq = 3L ) } \arguments{ @@ -56,8 +55,6 @@ or \code{0}.} permutative neutral losses per peptide sequence for variable modifications. To bypass the combinatorial of neutral losses, set \code{maxn_vnl_per_seq = 1} or \code{0}.} - -\item{digits}{Integer; the number of decimal places to be used.} } \description{ For direct uses from an R console (with trade-offs in speed). diff --git a/man/calc_pepmasses2.Rd b/man/calc_pepmasses2.Rd index d7a029b..285bf2d 100644 --- a/man/calc_pepmasses2.Rd +++ b/man/calc_pepmasses2.Rd @@ -13,6 +13,7 @@ calc_pepmasses2( fixedmods = c("TMT6plex (N-term)", "TMT6plex (K)", "Carbamidomethyl (C)"), varmods = c("Acetyl (Protein N-term)", "Oxidation (M)", "Deamidated (N)", "Gln->pyro-Glu (N-term = Q)"), + rm_dup_term_anywhere = TRUE, fixedlabs = NULL, varlabs = NULL, mod_motifs = NULL, @@ -28,7 +29,6 @@ calc_pepmasses2( max_miss = 2L, min_mass = 200L, max_mass = 4500L, - n_13c = 0L, out_path = NULL, digits = 4L, use_ms1_cache = TRUE, @@ -59,6 +59,10 @@ c("uniprot_acc", "uniprot_id", "refseq_acc", "other"). See also \item{varmods}{A character vector of variable modifications.} +\item{rm_dup_term_anywhere}{Logical; if TRUE, removes combinations in +variable modifications with site(s) in positions of both terminal and +anywhere, e.g., "Gln->pyro-Glu (N-term = Q)" and "Deamidated (Q).} + \item{fixedlabs}{Character string(s) of fixed isotopic labels. See examples of SILAC for details. Can be but not typically used in standard alone searches of labeled residues.} @@ -133,14 +137,6 @@ against low molecular-weight precursors is \code{min_len}).} \item{max_mass}{A positive integer; the maximum precursor mass for interrogation.} -\item{n_13c}{A non-negative integer; the maximum number of 13C off-sets for -consideration in MS1 masses. The default is 0 with no off-sets. -Peak-pickings by various MGF conversion tools may have attempted to adjust -precursor masses to the corresponding mono-isotopic masses in isotope -envelopes. Nevertheless, by setting \code{n_13c = 1}, some increases in the -number of PSMs may be readily achieved at a relatively small cost of search -time.} - \item{out_path}{An output path.} \item{digits}{Integer; the number of decimal places to be used.} diff --git a/man/calc_pepprobs_i.Rd b/man/calc_pepprobs_i.Rd index 8df03a9..ca20e81 100644 --- a/man/calc_pepprobs_i.Rd +++ b/man/calc_pepprobs_i.Rd @@ -32,11 +32,11 @@ and y-ions.} \item{ppm_ms2}{A positive integer; the mass tolerance of MS2 species. The default is 20.} -\item{soft_secions}{Impacts on search performance not yet assessed. Logical; -if TRUE, collapses the intensities of secondary ions to primary ions even -when the primaries are absent. The default is FALSE. For instance, the -signal of \code{b5^*} will be ignored if its primary ion \code{b5} is not -matched.} +\item{soft_secions}{Logical; if TRUE, collapses the intensities of secondary +ions to primary ions even when the primaries are absent. The default is +FALSE. For instance, the signal of \code{b5^*} will be ignored if its +primary ion \code{b5} is not matched. The impacts of \code{soft_secions = +TRUE} on search performance has not yet been assessed.} \item{out_path}{A file path of outputs.} diff --git a/man/calc_pepscores.Rd b/man/calc_pepscores.Rd index 1af2778..2d0970c 100644 --- a/man/calc_pepscores.Rd +++ b/man/calc_pepscores.Rd @@ -62,11 +62,11 @@ for considerations. Longer peptides will be excluded. The default is 40.} \item{ppm_ms2}{A positive integer; the mass tolerance of MS2 species. The default is 20.} -\item{soft_secions}{Impacts on search performance not yet assessed. Logical; -if TRUE, collapses the intensities of secondary ions to primary ions even -when the primaries are absent. The default is FALSE. For instance, the -signal of \code{b5^*} will be ignored if its primary ion \code{b5} is not -matched.} +\item{soft_secions}{Logical; if TRUE, collapses the intensities of secondary +ions to primary ions even when the primaries are absent. The default is +FALSE. For instance, the signal of \code{b5^*} will be ignored if its +primary ion \code{b5} is not matched. The impacts of \code{soft_secions = +TRUE} on search performance has not yet been assessed.} \item{out_path}{A file path of outputs.} @@ -196,10 +196,10 @@ sets of combinatorial variable modifications. The default is 512.} \item{add_ms2ints}{Logical; if TRUE, adds the sequence of experimental MS2 intensity values (\code{pep_ms2_ints}).} -\item{by_modules}{Logical. Experimenting. At the TRUE default, searches MS -data by individual modules of combinatorial fixed and variable -modifications. If FALSE, search all modules together. The later would -probably need more than 32G RAM if the number of modules is over 96.} +\item{by_modules}{Not used. Logical. At the TRUE default, searches MS data by +individual modules of combinatorial fixed and variable modifications. If +FALSE, search all modules together. The later would probably need more than +32G RAM if the number of modules is over 96.} \item{digits}{A non-negative integer; the number of decimal places to be used. The default is 4.} diff --git a/man/calc_probi.Rd b/man/calc_probi.Rd index d7ec1e8..c9835d4 100644 --- a/man/calc_probi.Rd +++ b/man/calc_probi.Rd @@ -40,11 +40,11 @@ searches.} \item{ppm_ms2}{A positive integer; the mass tolerance of MS2 species. The default is 20.} -\item{soft_secions}{Impacts on search performance not yet assessed. Logical; -if TRUE, collapses the intensities of secondary ions to primary ions even -when the primaries are absent. The default is FALSE. For instance, the -signal of \code{b5^*} will be ignored if its primary ion \code{b5} is not -matched.} +\item{soft_secions}{Logical; if TRUE, collapses the intensities of secondary +ions to primary ions even when the primaries are absent. The default is +FALSE. For instance, the signal of \code{b5^*} will be ignored if its +primary ion \code{b5} is not matched. The impacts of \code{soft_secions = +TRUE} on search performance has not yet been assessed.} \item{min_ms2mass}{A positive integer; the minimum MS2 mass for interrogation. The default is 110.} diff --git a/man/calc_probi_bypep.Rd b/man/calc_probi_bypep.Rd index c0ed2ea..d637bfa 100644 --- a/man/calc_probi_bypep.Rd +++ b/man/calc_probi_bypep.Rd @@ -43,11 +43,11 @@ searches.} \item{ppm_ms2}{A positive integer; the mass tolerance of MS2 species. The default is 20.} -\item{soft_secions}{Impacts on search performance not yet assessed. Logical; -if TRUE, collapses the intensities of secondary ions to primary ions even -when the primaries are absent. The default is FALSE. For instance, the -signal of \code{b5^*} will be ignored if its primary ion \code{b5} is not -matched.} +\item{soft_secions}{Logical; if TRUE, collapses the intensities of secondary +ions to primary ions even when the primaries are absent. The default is +FALSE. For instance, the signal of \code{b5^*} will be ignored if its +primary ion \code{b5} is not matched. The impacts of \code{soft_secions = +TRUE} on search performance has not yet been assessed.} \item{min_ms2mass}{A positive integer; the minimum MS2 mass for interrogation. The default is 110.} diff --git a/man/calc_probi_byvmods.Rd b/man/calc_probi_byvmods.Rd index 96044f7..e97d10a 100644 --- a/man/calc_probi_byvmods.Rd +++ b/man/calc_probi_byvmods.Rd @@ -45,11 +45,11 @@ searches.} \item{ppm_ms2}{A positive integer; the mass tolerance of MS2 species. The default is 20.} -\item{soft_secions}{Impacts on search performance not yet assessed. Logical; -if TRUE, collapses the intensities of secondary ions to primary ions even -when the primaries are absent. The default is FALSE. For instance, the -signal of \code{b5^*} will be ignored if its primary ion \code{b5} is not -matched.} +\item{soft_secions}{Logical; if TRUE, collapses the intensities of secondary +ions to primary ions even when the primaries are absent. The default is +FALSE. For instance, the signal of \code{b5^*} will be ignored if its +primary ion \code{b5} is not matched. The impacts of \code{soft_secions = +TRUE} on search performance has not yet been assessed.} \item{burn_ins}{The range of burn-ins where inputs will be excluded from probablity assessments.} diff --git a/man/calc_protfdr.Rd b/man/calc_protfdr.Rd index 95ebd51..7020efe 100644 --- a/man/calc_protfdr.Rd +++ b/man/calc_protfdr.Rd @@ -31,13 +31,13 @@ protein significance score cutoff of zero and thus are significant. Choose Note that the the value of \code{prot_n_pep} includes the counts of shared peptides.} -\item{method_prot_es_co}{A character string; the method to calculate the -cut-offs of protein enrichment scores. The value is in one of \code{ -"median", "mean", "max", "min"} with the default of \code{"median"}. For -instance at the default, the median of \code{peptide_score - -pep_score_cutoff} under a protein will be used to represent the threshold -of a protein enrichment score. For more conserved thresholds, the -statistics of \code{"max"} may be considered.} +\item{method_prot_es_co}{A low-priority setting. A character string; the +method to calculate the cut-offs of protein enrichment scores. The value is +in one of \code{"median", "mean", "max", "min"} with the default of +\code{"median"}. For instance at the default, the median of +\code{peptide_score - pep_score_cutoff} under a protein will be used to +represent the threshold of a protein enrichment score. For more conserved +thresholds, the statistics of \code{"max"} may be considered.} \item{out_path}{An output path.} } diff --git a/man/calc_protfdr_i.Rd b/man/calc_protfdr_i.Rd index 309630c..735e264 100644 --- a/man/calc_protfdr_i.Rd +++ b/man/calc_protfdr_i.Rd @@ -26,13 +26,13 @@ protein significance score cutoff of zero and thus are significant. Choose Note that the the value of \code{prot_n_pep} includes the counts of shared peptides.} -\item{method_prot_es_co}{A character string; the method to calculate the -cut-offs of protein enrichment scores. The value is in one of \code{ -"median", "mean", "max", "min"} with the default of \code{"median"}. For -instance at the default, the median of \code{peptide_score - -pep_score_cutoff} under a protein will be used to represent the threshold -of a protein enrichment score. For more conserved thresholds, the -statistics of \code{"max"} may be considered.} +\item{method_prot_es_co}{A low-priority setting. A character string; the +method to calculate the cut-offs of protein enrichment scores. The value is +in one of \code{"median", "mean", "max", "min"} with the default of +\code{"median"}. For instance at the default, the median of +\code{peptide_score - pep_score_cutoff} under a protein will be used to +represent the threshold of a protein enrichment score. For more conserved +thresholds, the statistics of \code{"max"} may be considered.} \item{n_burnin}{The minimum number of burn-ins in protein enrichment scores.} diff --git a/man/calc_tmtint.Rd b/man/calc_tmtint.Rd index da733f2..6e54aac 100644 --- a/man/calc_tmtint.Rd +++ b/man/calc_tmtint.Rd @@ -4,12 +4,7 @@ \alias{calc_tmtint} \title{Reporter-ion quantitation.} \usage{ -calc_tmtint( - data = NULL, - quant = c("none", "tmt6", "tmt10", "tmt11", "tmt16"), - ppm_reporters = 10L, - index_mgf_ms2 = FALSE -) +calc_tmtint(data = NULL, quant = "tmt16", ppm_reporters = 10L) } \arguments{ \item{data}{An upstream result from \link{matchMS}.} @@ -20,20 +15,6 @@ compatible higher plexes, for example, \code{tmt16} for \code{tmt12} etc. and \code{tmt10} for \code{tmt8} etc.} \item{ppm_reporters}{The mass tolerance of MS2 reporter ions.} - -\item{index_mgf_ms2}{A low-priority feature. Logical; if TRUE, converts - upfrontly MS2 m-over-z values from numeric to integers as opposed to - \emph{in-situ} conversion during ion matches. The default is FALSE. The - \code{index_mgf_ms2 = TRUE} might be useful for very large MS files by - reducing RAM footprints. - - At \code{index_mgf_ms2 = TRUE}, the resolution of mass deltas between - theoretical and experimental MS2 m-over-z values is limited by the - \code{bin_width}, which is the ceiling half of the \code{ppm_ms2}. For - instance, the \code{bin_width} is 10 ppm at the default \code{ppm_ms2 = - 20}. Due to the low resolution in mass deltas at \code{index_mgf_ms2 = TRUE}, - the fields of \code{pep_ms2_deltas, pep_ms2_deltas2, pep_ms2_deltas_mean, - pep_ms2_deltas_sd} are nullified in the outputs.} } \description{ Not yet used: \code{`134C` = 134.154565}, \code{`135N` = 135.15160} diff --git a/man/calcpepsc.Rd b/man/calcpepsc.Rd index 6549e4a..ef832cd 100644 --- a/man/calcpepsc.Rd +++ b/man/calcpepsc.Rd @@ -49,11 +49,11 @@ and y-ions.} \item{ppm_ms2}{A positive integer; the mass tolerance of MS2 species. The default is 20.} -\item{soft_secions}{Impacts on search performance not yet assessed. Logical; -if TRUE, collapses the intensities of secondary ions to primary ions even -when the primaries are absent. The default is FALSE. For instance, the -signal of \code{b5^*} will be ignored if its primary ion \code{b5} is not -matched.} +\item{soft_secions}{Logical; if TRUE, collapses the intensities of secondary +ions to primary ions even when the primaries are absent. The default is +FALSE. For instance, the signal of \code{b5^*} will be ignored if its +primary ion \code{b5} is not matched. The impacts of \code{soft_secions = +TRUE} on search performance has not yet been assessed.} \item{out_path}{A file path of outputs.} @@ -112,10 +112,10 @@ plexes. For example, apply \code{tmt16} for \code{tmt12} provided a set of \item{ppm_reporters}{A positive integer; the mass tolerance of MS2 reporter ions. The default is 10.} -\item{by_modules}{Logical. Experimenting. At the TRUE default, searches MS -data by individual modules of combinatorial fixed and variable -modifications. If FALSE, search all modules together. The later would -probably need more than 32G RAM if the number of modules is over 96.} +\item{by_modules}{Not used. Logical. At the TRUE default, searches MS data by +individual modules of combinatorial fixed and variable modifications. If +FALSE, search all modules together. The later would probably need more than +32G RAM if the number of modules is over 96.} \item{digits}{A non-negative integer; the number of decimal places to be used. The default is 4.} diff --git a/man/calib_mgf.Rd b/man/calib_mgf.Rd index d6209a6..c7b5e90 100644 --- a/man/calib_mgf.Rd +++ b/man/calib_mgf.Rd @@ -8,6 +8,7 @@ calib_mgf( mgf_path = NULL, aa_masses_all = NULL, out_path = NULL, + .path_bin, mod_indexes = NULL, type_ms2ions = "by", maxn_vmods_per_pep = 5L, @@ -38,8 +39,7 @@ calib_mgf( min_len = 7L, max_len = 40L, max_miss = 2L, - knots = 50L, - digits = 4L + knots = 50L ) } \arguments{ @@ -57,8 +57,10 @@ calib_mgf( \item{out_path}{A file path of outputs.} +\item{.path_bin}{The file path to binned precursor masses.} + \item{mod_indexes}{Integer; the indexes of fixed and/or variable -modifications} +modifications.} \item{type_ms2ions}{Character; the type of \href{http://www.matrixscience.com/help/fragmentation_help.html}{ MS2 @@ -142,10 +144,10 @@ ions. The default is 10.} the fields of \code{pep_ms2_deltas, pep_ms2_deltas2, pep_ms2_deltas_mean, pep_ms2_deltas_sd} are nullified in the outputs.} -\item{by_modules}{Logical. Experimenting. At the TRUE default, searches MS -data by individual modules of combinatorial fixed and variable -modifications. If FALSE, search all modules together. The later would -probably need more than 32G RAM if the number of modules is over 96.} +\item{by_modules}{Not used. Logical. At the TRUE default, searches MS data by +individual modules of combinatorial fixed and variable modifications. If +FALSE, search all modules together. The later would probably need more than +32G RAM if the number of modules is over 96.} \item{fasta}{Character string(s) to the name(s) of fasta file(s) with prepended directory path. The experimenter needs to supply the files.} @@ -197,9 +199,6 @@ for considerations. Longer peptides will be excluded. The default is 40.} per peptide sequence for considerations. The default is 2.} \item{knots}{The number of knots for spline fits.} - -\item{digits}{A non-negative integer; the number of decimal places to be -used. The default is 4.} } \description{ \code{ppm_ms1} only for the calculation of frame indexes of precursors. diff --git a/man/calib_ms1.Rd b/man/calib_ms1.Rd index d36bae1..c8ddea7 100644 --- a/man/calib_ms1.Rd +++ b/man/calib_ms1.Rd @@ -12,8 +12,7 @@ calib_ms1( ppm_ms1 = 20L, min_mass = 200L, max_mass = 4500L, - knots = 50L, - digits = 4L + knots = 50L ) } \arguments{ @@ -44,9 +43,6 @@ against low molecular-weight precursors is \code{min_len}).} interrogation.} \item{knots}{The number of knots for spline fits.} - -\item{digits}{A non-negative integer; the number of decimal places to be -used. The default is 4.} } \description{ Calibrates precursor masses (by individual RAW_Files) diff --git a/man/find_aa_masses.Rd b/man/find_aa_masses.Rd index 1a0d68d..4756101 100644 --- a/man/find_aa_masses.Rd +++ b/man/find_aa_masses.Rd @@ -9,6 +9,7 @@ find_aa_masses( out_path = NULL, fixedmods = NULL, varmods = NULL, + rm_dup_term_anywhere = TRUE, varlabs = NULL, mod_motifs = NULL, maxn_vmods_setscombi = 64L @@ -24,6 +25,10 @@ find_aa_masses( \item{varmods}{A character vector of variable modifications.} +\item{rm_dup_term_anywhere}{Logical; if TRUE, removes combinations in +variable modifications with site(s) in positions of both terminal and +anywhere, e.g., "Gln->pyro-Glu (N-term = Q)" and "Deamidated (Q).} + \item{varlabs}{Character string(s) of variable isotopic labels. See examples of SILAC for details. Can be but not typically used in standard alone searches of labeled residues.} diff --git a/man/find_ms1_interval.Rd b/man/find_ms1_interval.Rd index b7a10a7..a107355 100644 --- a/man/find_ms1_interval.Rd +++ b/man/find_ms1_interval.Rd @@ -5,7 +5,7 @@ \title{Calculates the frame numbers for a list of experimental MS1 mass by intervals.} \usage{ -find_ms1_interval(mass = 1800, from = 115L, ppm = 10L) +find_ms1_interval(mass = 1800, from = 200L, ppm = 10L) } \arguments{ \item{mass}{Numeric; a list of MS1 masses.} diff --git a/man/find_ms1_offsets.Rd b/man/find_ms1_offsets.Rd new file mode 100644 index 0000000..295967b --- /dev/null +++ b/man/find_ms1_offsets.Rd @@ -0,0 +1,19 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/msmsmatches2.R +\name{find_ms1_offsets} +\alias{find_ms1_offsets} +\title{Finds offsets in precursor masses.} +\usage{ +find_ms1_offsets(n_13c = 0L, ms1_notches = 0) +} +\arguments{ +\item{n_13c}{Number(s) of 13C off-sets in precursor masses, for example, over +the range of \code{-1:2}. The default is 0.} + +\item{ms1_notches}{A numeric vector; notches (off-sets) in precursor masses, +e.g., \code{c(-79.966331, -97.976896)} to account fo the loss of a phospho +group and phosphoric acid in precursor masses.} +} +\description{ +Finds offsets in precursor masses. +} diff --git a/man/find_ms2_bypep.Rd b/man/find_ms2_bypep.Rd index 526fa24..46b4d6e 100644 --- a/man/find_ms2_bypep.Rd +++ b/man/find_ms2_bypep.Rd @@ -251,7 +251,6 @@ d <- ppm_ms2/1E6 ex <- ceiling(log(expts/min_ms2mass)/log(1+d)) x5 <- mzion:::find_ms2_bypep(theos, expts, ex, d, ppm_ms2) - } } diff --git a/man/find_reporter_ints.Rd b/man/find_reporter_ints.Rd index 3127763..9d5d424 100644 --- a/man/find_reporter_ints.Rd +++ b/man/find_reporter_ints.Rd @@ -11,7 +11,7 @@ find_reporter_ints( ul, ppm_reporters = 10L, len, - nms + channels ) } \arguments{ @@ -30,7 +30,7 @@ ions. The default is 10.} \item{len}{The length of reporter-ion plexes.} -\item{nms}{The names of reporter-ion channels.} +\item{channels}{The names of reporter-ion channels.} } \description{ Finds the intensities of reporter-ions. @@ -55,13 +55,13 @@ names(theos) <- c("126", "127N", "127C", "128N", "128C", ppm_reporters <- 10 ul <- c(126.1, 131.2) len <- 10 -nms <- names(theos) +channels <- names(theos) x <- mzion:::find_reporter_ints(ms2_moverzs, ms2_ints, theos, ul, ppm_reporters = 10, - len , nms) + len , channels) x <- mzion:::find_reporter_ints(ms2_moverzs, ms2_ints, theos, ul, ppm_reporters = 25, - len , nms) + len , channels) # Two `129C`, no `127N` etc. ms2_moverzs <- c(105.1503, 107.0428, 111.7716, 120.0811, 126.1281, 127.1312, @@ -78,6 +78,6 @@ ms2_ints <- c(1201.79, 1319.32, 1603.45, 1595.34, 2148.66, 1785.74, 1254.24, 1990.57, 1758.72, 1655.09, 1460.68, 1641.39, 1721.33) x <- mzion:::find_reporter_ints(ms2_moverzs, ms2_ints, theos, ul, ppm_reporters = 25, - len , nms) + len , channels) } } diff --git a/man/find_reporters_ppm.Rd b/man/find_reporters_ppm.Rd index bb524de..de7956f 100644 --- a/man/find_reporters_ppm.Rd +++ b/man/find_reporters_ppm.Rd @@ -4,7 +4,7 @@ \alias{find_reporters_ppm} \title{Finds the indexes of reporter ions.} \usage{ -find_reporters_ppm(theos, expts, ppm_reporters = 10L, len, nms) +find_reporters_ppm(theos, expts, ppm_reporters = 10L, len) } \arguments{ \item{theos}{The theoretical m-over-z of reporter ions.} @@ -16,8 +16,6 @@ reporter ions).} ions. The default is 10.} \item{len}{The length of reporter-ion plexes.} - -\item{nms}{The names of reporter-ion channels.} } \value{ A vector of indexes diff --git a/man/frames_adv.Rd b/man/frames_adv.Rd index 0286ca8..18e9716 100644 --- a/man/frames_adv.Rd +++ b/man/frames_adv.Rd @@ -30,7 +30,6 @@ frames_adv( ppm_ms2 = 10L, min_ms2mass = 115L, index_mgf_ms2 = FALSE, - digits = 4L, FUN ) } @@ -43,13 +42,13 @@ values in \code{frame} for progressive searches.} \item{theopeps}{Binned theoretical peptides corresponding to an i-th \code{aa_masses}.} -\item{aa_masses}{An amino-acid look-up} +\item{aa_masses}{An amino-acid look-up.} \item{ms1vmods}{All possible labels of MS1 variable modifications with -an i-th \code{aa_masses}} +an i-th \code{aa_masses}.} \item{ms2vmods}{All possible labels of MS2 variable modifications with -an i-t \code{aa_masses}} +an i-t \code{aa_masses}.} \item{ntmod}{The attribute \code{ntmod} from a \code{aa_masses}.} @@ -65,7 +64,7 @@ an i-t \code{aa_masses}} \item{fmods_nl}{The attribute of \code{fmods_nl} from an \code{aa_masses}.} -\item{pep_mod_group}{The index of peptide modification groups} +\item{pep_mod_group}{The index of peptide modification groups.} \item{mod_indexes}{Integer; the indexes of fixed and/or variable modifications.} @@ -128,9 +127,6 @@ interrogation. The default is 110.} the fields of \code{pep_ms2_deltas, pep_ms2_deltas2, pep_ms2_deltas_mean, pep_ms2_deltas_sd} are nullified in the outputs.} -\item{digits}{A non-negative integer; the number of decimal places to be -used. The default is 4.} - \item{FUN}{A function pointer to, e.g., \link{gen_ms2ions_base}.} } \value{ diff --git a/man/fuzzy_match_one.Rd b/man/fuzzy_match_one.Rd deleted file mode 100644 index 1378286..0000000 --- a/man/fuzzy_match_one.Rd +++ /dev/null @@ -1,25 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/ms2frames.R -\name{fuzzy_match_one} -\alias{fuzzy_match_one} -\title{Fuzzy matches with a +/-1 window.} -\usage{ -fuzzy_match_one(x, y) -} -\arguments{ -\item{x}{A vector to be matched.} - -\item{y}{A vector to be matched against.} -} -\description{ -Not used but called the codes inside directly. -} -\examples{ -library(mzion) - -ans1 <- mzion:::fuzzy_match_one(c(74953, 74955), rep(74954, 2)) -ans2 <- mzion:::fuzzy_match_one(c(74953, 74955), 74954) - -stopifnot(identical(ans1, ans2)) -stopifnot(ans1 == c(TRUE, TRUE)) -} diff --git a/man/fuzzy_match_one2.Rd b/man/fuzzy_match_one2.Rd deleted file mode 100644 index f71780a..0000000 --- a/man/fuzzy_match_one2.Rd +++ /dev/null @@ -1,57 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/ms2frames.R -\name{fuzzy_match_one2} -\alias{fuzzy_match_one2} -\title{Fuzzy matches with a +/-1 window.} -\usage{ -fuzzy_match_one2(x, y) -} -\arguments{ -\item{x}{A vector to be matched.} - -\item{y}{A vector to be matched against.} -} -\description{ -No multiple dipping of \code{y} matches. A \code{y} value will be removed (or -became 0) if matched, -} -\examples{ -library(mzion) - -ans1 <- mzion:::fuzzy_match_one2(c(74953, 74955), rep(74954, 2)) -ans2 <- mzion:::fuzzy_match_one2(c(74953, 74955), 74954) - -stopifnot(identical(ans1, ans2)) -stopifnot(ans1 == c(FALSE, TRUE)) - -ans3 <- mzion:::fuzzy_match_one2(c(74953, 74955, 80000), c(74955, 80000)) - -## The x3 example from "find_ms2_bypep" -x <- c(-9185, -3369, -1973, -626, 59, 714, 3326, 7106, 7711, 7715, 8316, 8320, - 8916, 8920, 9511, 9515, 10102, 10688, 11211, 12945, 16807, 24001, 24481, - 31480, 32350, 32805, 37050, 37875, 42986, 53028, 53377, 53711, 56940, 58542, - 59172, 61310, 62482, 70941, 73801, 77575, 78046, 78047, 84120, 85881, 89313, - 91185, 96328, 101503, 102916, 104302, 113257, 113411, 116563, 118593, - 121336, 121405, 121474, 123450, 123841, 125826, 127823, 130750, 131786, - 131842, 131903, 134568, 135267, 135956, 139090, 139200, 146310, 146801, - 146902, 149442, 152081, 152174, 153544, 153635, 160913, 160995, 161078, - 162794, 162875, 163036, 163117, 163191, 163271, 168686, 169869, 169943, - 173741, 173812, 173951, 174856, 174922, 174990, 175059, 175128, 175197, - 175266) - -aas <- unlist(strsplit("SLAAEEEAAR", "")) - -y <- c(317.2022, 430.2863, 501.3234, 572.3605, 701.4031, - 830.4457, 959.4883, 1030.5254, 1101.5625, 1257.6636, - 175.1190, 246.1561, 317.1932, 446.2358, 575.2784, - 704.3210, 775.3581, 846.3952, 959.4793, 1046.5113) - -names(y) <- c(aas, rev(aas)) - -ppm_ms2 <- 13L -min_ms2mass <- 115L -d <- ppm_ms2/1E6 -y <- ceiling(log(y/min_ms2mass)/log(1+d)) - -ans <- mzion:::fuzzy_match_one2(x, y) -} diff --git a/man/gen_ms2ions_base.Rd b/man/gen_ms2ions_base.Rd index d555132..f2bcb13 100644 --- a/man/gen_ms2ions_base.Rd +++ b/man/gen_ms2ions_base.Rd @@ -27,8 +27,7 @@ gen_ms2ions_base( maxn_sites_per_vmod = 3L, maxn_fnl_per_seq = 3L, maxn_vnl_per_seq = 3L, - maxn_vmods_sitescombi_per_pep = 64L, - digits = 4L + maxn_vmods_sitescombi_per_pep = 64L ) gen_ms2ions_a0_vnl0_fnl1( @@ -50,8 +49,7 @@ gen_ms2ions_a0_vnl0_fnl1( maxn_sites_per_vmod = 3L, maxn_fnl_per_seq = 3L, maxn_vnl_per_seq = 3L, - maxn_vmods_sitescombi_per_pep = 64L, - digits = 4L + maxn_vmods_sitescombi_per_pep = 64L ) gen_ms2ions_a1_vnl0_fnl0( @@ -73,8 +71,7 @@ gen_ms2ions_a1_vnl0_fnl0( maxn_sites_per_vmod = 3L, maxn_fnl_per_seq = 3L, maxn_vnl_per_seq = 3L, - maxn_vmods_sitescombi_per_pep = 64L, - digits = 4L + maxn_vmods_sitescombi_per_pep = 64L ) gen_ms2ions_a1_vnl0_fnl1( @@ -96,8 +93,7 @@ gen_ms2ions_a1_vnl0_fnl1( maxn_sites_per_vmod = 3L, maxn_vmods_sitescombi_per_pep = 64L, maxn_fnl_per_seq = 3L, - maxn_vnl_per_seq = 3L, - digits = 4L + maxn_vnl_per_seq = 3L ) gen_ms2ions_a1_vnl1_fnl0( @@ -119,8 +115,7 @@ gen_ms2ions_a1_vnl1_fnl0( maxn_sites_per_vmod = 3L, maxn_vmods_sitescombi_per_pep = 64L, maxn_fnl_per_seq = 3L, - maxn_vnl_per_seq = 3L, - digits = 4L + maxn_vnl_per_seq = 3L ) } \arguments{ @@ -189,9 +184,6 @@ To bypass the combinatorial of neutral losses, set \code{maxn_vnl_per_seq = number of combinatorial variable modifications per peptide sequence (per module). The ways include the permutations in neutral losses and modifications (e.g., \code{Acetyl (K) and TMT (K)}).} - -\item{digits}{A non-negative integer; the number of decimal places to be -used. The default is 4.} } \description{ (1) "amods- tmod- vnl- fnl-", (2) "amods- tmod+ vnl- fnl-" diff --git a/man/hadd_primatches.Rd b/man/hadd_primatches.Rd index f63efa3..aef158b 100644 --- a/man/hadd_primatches.Rd +++ b/man/hadd_primatches.Rd @@ -39,10 +39,10 @@ hadd_primatches( \item{add_ms2ints}{Logical; if TRUE, adds the sequence of experimental MS2 intensity values (\code{pep_ms2_ints}).} -\item{by_modules}{Logical. Experimenting. At the TRUE default, searches MS -data by individual modules of combinatorial fixed and variable -modifications. If FALSE, search all modules together. The later would -probably need more than 32G RAM if the number of modules is over 96.} +\item{by_modules}{Not used. Logical. At the TRUE default, searches MS data by +individual modules of combinatorial fixed and variable modifications. If +FALSE, search all modules together. The later would probably need more than +32G RAM if the number of modules is over 96.} \item{index_mgf_ms2}{A low-priority feature. Logical; if TRUE, converts upfrontly MS2 m-over-z values from numeric to integers as opposed to diff --git a/man/hcalc_tmtint.Rd b/man/hcalc_tmtint.Rd index 6f34441..2bda578 100644 --- a/man/hcalc_tmtint.Rd +++ b/man/hcalc_tmtint.Rd @@ -9,8 +9,7 @@ hcalc_tmtint( quant = "tmt10", ppm_reporters = 10L, idx = 1L, - out_path = NULL, - index_mgf_ms2 = FALSE + out_path = NULL ) } \arguments{ @@ -31,20 +30,6 @@ ions. The default is 10.} \item{idx}{The i-th chunk} \item{out_path}{A file path of outputs.} - -\item{index_mgf_ms2}{A low-priority feature. Logical; if TRUE, converts - upfrontly MS2 m-over-z values from numeric to integers as opposed to - \emph{in-situ} conversion during ion matches. The default is FALSE. The - \code{index_mgf_ms2 = TRUE} might be useful for very large MS files by - reducing RAM footprints. - - At \code{index_mgf_ms2 = TRUE}, the resolution of mass deltas between - theoretical and experimental MS2 m-over-z values is limited by the - \code{bin_width}, which is the ceiling half of the \code{ppm_ms2}. For - instance, the \code{bin_width} is 10 ppm at the default \code{ppm_ms2 = - 20}. Due to the low resolution in mass deltas at \code{index_mgf_ms2 = TRUE}, - the fields of \code{pep_ms2_deltas, pep_ms2_deltas2, pep_ms2_deltas_mean, - pep_ms2_deltas_sd} are nullified in the outputs.} } \description{ Helper of calc_tmtint diff --git a/man/hms2match.Rd b/man/hms2match.Rd index 0c573c0..7b5ca1f 100644 --- a/man/hms2match.Rd +++ b/man/hms2match.Rd @@ -24,8 +24,7 @@ hms2match( min_ms2mass = 115L, index_mgf_ms2 = FALSE, by_modules = FALSE, - df0 = NULL, - digits = 4L + df0 = NULL ) } \arguments{ @@ -116,15 +115,12 @@ interrogation. The default is 110.} the fields of \code{pep_ms2_deltas, pep_ms2_deltas2, pep_ms2_deltas_mean, pep_ms2_deltas_sd} are nullified in the outputs.} -\item{by_modules}{Logical. Experimenting. At the TRUE default, searches MS -data by individual modules of combinatorial fixed and variable -modifications. If FALSE, search all modules together. The later would -probably need more than 32G RAM if the number of modules is over 96.} +\item{by_modules}{Not used. Logical. At the TRUE default, searches MS data by +individual modules of combinatorial fixed and variable modifications. If +FALSE, search all modules together. The later would probably need more than +32G RAM if the number of modules is over 96.} \item{df0}{An output template with zero rows} - -\item{digits}{A non-negative integer; the number of decimal places to be -used. The default is 4.} } \description{ By MGF chunks diff --git a/man/hms2match_one.Rd b/man/hms2match_one.Rd new file mode 100644 index 0000000..0a30a3a --- /dev/null +++ b/man/hms2match_one.Rd @@ -0,0 +1,130 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/ms2frames.R +\name{hms2match_one} +\alias{hms2match_one} +\title{Matches experimentals and theoreticals} +\usage{ +hms2match_one( + pep_mod_group, + mgths, + aa_masses, + FUN, + ms1vmods, + ms2vmods, + cl, + mod_indexes, + mgf_path, + out_path, + type_ms2ions = "by", + maxn_vmods_per_pep = 5L, + maxn_sites_per_vmod = 3L, + maxn_fnl_per_seq = 3L, + maxn_vnl_per_seq = 3L, + maxn_vmods_sitescombi_per_pep = 64L, + minn_ms2 = 6L, + ppm_ms1 = 10L, + ppm_ms2 = 10L, + min_ms2mass = 115L, + index_mgf_ms2 = FALSE, + df0 = NULL +) +} +\arguments{ +\item{pep_mod_group}{The index of peptide modification groups.} + +\item{mgths}{Pairs of experimental and theoretical data.} + +\item{aa_masses}{An amino-acid look-up.} + +\item{FUN}{A function, e.g., \link{gen_ms2ions_base}, with an i-th module of +\code{aa_masses}.} + +\item{ms1vmods}{All possible labels of MS1 variable modifications with +an i-th \code{aa_masses}.} + +\item{ms2vmods}{All possible labels of MS2 variable modifications with +an i-t \code{aa_masses}.} + +\item{cl}{The value of clusters for parallel processes.} + +\item{mod_indexes}{Integer; the indexes of fixed and/or variable +modifications} + +\item{mgf_path}{A file path to a list of MGF files. The experimenter needs to + supply the files. + + The supported MGFs are in the formats of (1) MSConvert against \code{.raw} + from Thermo's Orbitrap or \code{.d} from Bruker's timsTOF Pro, (2) Thermo's + Proteome Discoverer or (3) Bruker's DataAnalysis. + + With MSConvert, the default \code{titleMaker} is required for correct + parsing (don't think it can be altered by users, but just in case).} + +\item{out_path}{A file path of outputs.} + +\item{type_ms2ions}{Character; the type of +\href{http://www.matrixscience.com/help/fragmentation_help.html}{ MS2 +ions}. Values are in one of "by", "ax" and "cz". The default is "by" for b- +and y-ions.} + +\item{maxn_vmods_per_pep}{A non-negative integer; the maximum number of +\code{Anywhere} (non-terminal) variable modifications per peptide. The +default is 5.} + +\item{maxn_sites_per_vmod}{A non-negative integer; the maximum number of + combinatorial \code{Anywhere} (non-terminal) variable modifications per + site in a peptide sequence. The default is 3. + + For instance, variable modifications of \code{Carbamyl (M)} and + \code{Oxidation (M)} both have site \code{M}. In order to have a + combination of two \code{Carbamyl (M)} and two \code{Oxidation (M)} being + considered, the value of \code{maxn_sites_per_vmod} needs to be four or + greater.} + +\item{maxn_fnl_per_seq}{A non-negative integer; the maximum number of +permutative neutral losses per peptide sequence for fixed modifications. To +bypass the combinatorial of neutral losses, set \code{maxn_fnl_per_seq = 1} +or \code{0}.} + +\item{maxn_vnl_per_seq}{A non-negative integer; the maximum number of +permutative neutral losses per peptide sequence for variable modifications. +To bypass the combinatorial of neutral losses, set \code{maxn_vnl_per_seq = +1} or \code{0}.} + +\item{maxn_vmods_sitescombi_per_pep}{A non-negative integer; the maximum +number of combinatorial variable modifications per peptide sequence (per +module). The ways include the permutations in neutral losses and +modifications (e.g., \code{Acetyl (K) and TMT (K)}).} + +\item{minn_ms2}{A positive integer; the minimum number of matched MS2 ions +for consideration as a hit. Counts of secondary ions, e.g. b0, b* etc., are +not part of the threshold.} + +\item{ppm_ms1}{A positive integer; the mass tolerance of MS1 species. The +default is 20.} + +\item{ppm_ms2}{A positive integer; the mass tolerance of MS2 species. The +default is 20.} + +\item{min_ms2mass}{A positive integer; the minimum MS2 mass for +interrogation. The default is 110.} + +\item{index_mgf_ms2}{A low-priority feature. Logical; if TRUE, converts + upfrontly MS2 m-over-z values from numeric to integers as opposed to + \emph{in-situ} conversion during ion matches. The default is FALSE. The + \code{index_mgf_ms2 = TRUE} might be useful for very large MS files by + reducing RAM footprints. + + At \code{index_mgf_ms2 = TRUE}, the resolution of mass deltas between + theoretical and experimental MS2 m-over-z values is limited by the + \code{bin_width}, which is the ceiling half of the \code{ppm_ms2}. For + instance, the \code{bin_width} is 10 ppm at the default \code{ppm_ms2 = + 20}. Due to the low resolution in mass deltas at \code{index_mgf_ms2 = TRUE}, + the fields of \code{pep_ms2_deltas, pep_ms2_deltas2, pep_ms2_deltas_mean, + pep_ms2_deltas_sd} are nullified in the outputs.} + +\item{df0}{An output template.} +} +\description{ +For a single module +} diff --git a/man/hpair_mgths.Rd b/man/hpair_mgths.Rd new file mode 100644 index 0000000..4557057 --- /dev/null +++ b/man/hpair_mgths.Rd @@ -0,0 +1,48 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/ms2frames.R +\name{hpair_mgths} +\alias{hpair_mgths} +\title{Helper of \link{pair_mgftheos}.} +\usage{ +hpair_mgths( + ms1_offset = 0, + notch = NULL, + mgfs, + n_modules, + by_modules = TRUE, + mgf_path, + min_mass = 200L, + max_mass = 4500L, + ppm_ms1_bin = 10L, + .path_bin +) +} +\arguments{ +\item{ms1_offset}{The ms1 offset.} + +\item{notch}{The index assigned to an ms1_offset.} + +\item{mgfs}{MGF data.} + +\item{n_modules}{The number of modules (\code{length(aa_masses_all)}) or one} + +\item{by_modules}{Logical; if TRUE, results are saved with one mgf to one +theo module. At FALSE, results are saved with one mgf paired to all theo +modules} + +\item{mgf_path}{The path to MGF files} + +\item{min_mass}{A positive integer; the minimum precursor mass for +interrogation. The default is an arbitrarily low value (the primary guard +against low molecular-weight precursors is \code{min_len}).} + +\item{max_mass}{A positive integer; the maximum precursor mass for +interrogation.} + +\item{ppm_ms1_bin}{The tolerance in precursor mass error after mass binning.} + +\item{.path_bin}{The path to binned theoretical masses} +} +\description{ +Helper of \link{pair_mgftheos}. +} diff --git a/man/matchMS.Rd b/man/matchMS.Rd index 5cb4748..3e8c9d7 100644 --- a/man/matchMS.Rd +++ b/man/matchMS.Rd @@ -14,6 +14,7 @@ matchMS( fixedmods = c("TMT6plex (N-term)", "TMT6plex (K)", "Carbamidomethyl (C)"), varmods = c("Acetyl (Protein N-term)", "Oxidation (M)", "Deamidated (N)", "Gln->pyro-Glu (N-term = Q)"), + rm_dup_term_anywhere = TRUE, fixedlabs = NULL, varlabs = NULL, locmods = c("Phospho (S)", "Phospho (T)", "Phospho (Y)"), @@ -41,6 +42,7 @@ matchMS( max_mass = 4500L, ppm_ms1 = 20L, n_13c = 0L, + ms1_notches = 0, par_groups = NULL, silac_mix = NULL, type_ms2ions = "by", @@ -68,7 +70,7 @@ matchMS( combine_tier_three = FALSE, max_n_prots = 60000L, use_ms1_cache = TRUE, - .path_cache = "~/mzion/.MSearches (1.2.6)/Cache/Calls", + .path_cache = "~/mzion/.MSearches (1.2.7)/Cache/Calls", .path_fasta = NULL, topn_ms2ions = 100L, topn_ms2ion_cuts = NA, @@ -138,6 +140,10 @@ c("uniprot_acc", "uniprot_id", "refseq_acc", "other"). See also \link{parse_unimod} for grammars of modification \code{title}, \code{position} and \code{site}.} +\item{rm_dup_term_anywhere}{Logical; if TRUE, removes combinations in +variable modifications with site(s) in positions of both terminal and +anywhere, e.g., "Gln->pyro-Glu (N-term = Q)" and "Deamidated (Q).} + \item{fixedlabs}{Character string(s) of fixed isotopic labels. See examples of SILAC for details. Can be but not typically used in standard alone searches of labeled residues.} @@ -261,15 +267,14 @@ interrogation.} \item{ppm_ms1}{A positive integer; the mass tolerance of MS1 species. The default is 20.} -\item{n_13c}{A non-negative integer; the maximum number of 13C off-sets for -consideration in MS1 masses. The default is 0 with no off-sets. -Peak-pickings by various MGF conversion tools may have attempted to adjust -precursor masses to the corresponding mono-isotopic masses in isotope -envelopes. Nevertheless, by setting \code{n_13c = 1}, some increases in the -number of PSMs may be readily achieved at a relatively small cost of search -time.} +\item{n_13c}{Number(s) of 13C off-sets in precursor masses, for example, over +the range of \code{-1:2}. The default is 0.} + +\item{ms1_notches}{A numeric vector; notches (off-sets) in precursor masses, +e.g., \code{c(-79.966331, -97.976896)} to account fo the loss of a phospho +group and phosphoric acid in precursor masses.} -\item{par_groups}{A low -priority feature. Parameter(s) of \code{matchMS} +\item{par_groups}{A low-priority feature. Parameter(s) of \code{matchMS} multiplied by sets of values in groups. Multiple searches will be performed separately against the parameter groups. For instance with one set of samples in SILAC light and the other in SILAC heavy, the experimenters may @@ -383,19 +388,19 @@ protein significance score cutoff of zero and thus are significant. Choose Note that the the value of \code{prot_n_pep} includes the counts of shared peptides.} -\item{method_prot_es_co}{A character string; the method to calculate the -cut-offs of protein enrichment scores. The value is in one of \code{ -"median", "mean", "max", "min"} with the default of \code{"median"}. For -instance at the default, the median of \code{peptide_score - -pep_score_cutoff} under a protein will be used to represent the threshold -of a protein enrichment score. For more conserved thresholds, the -statistics of \code{"max"} may be considered.} +\item{method_prot_es_co}{A low-priority setting. A character string; the +method to calculate the cut-offs of protein enrichment scores. The value is +in one of \code{"median", "mean", "max", "min"} with the default of +\code{"median"}. For instance at the default, the median of +\code{peptide_score - pep_score_cutoff} under a protein will be used to +represent the threshold of a protein enrichment score. For more conserved +thresholds, the statistics of \code{"max"} may be considered.} -\item{soft_secions}{Impacts on search performance not yet assessed. Logical; -if TRUE, collapses the intensities of secondary ions to primary ions even -when the primaries are absent. The default is FALSE. For instance, the -signal of \code{b5^*} will be ignored if its primary ion \code{b5} is not -matched.} +\item{soft_secions}{Logical; if TRUE, collapses the intensities of secondary +ions to primary ions even when the primaries are absent. The default is +FALSE. For instance, the signal of \code{b5^*} will be ignored if its +primary ion \code{b5} is not matched. The impacts of \code{soft_secions = +TRUE} on search performance has not yet been assessed.} \item{topn_mods_per_seq}{Positive integer; a threshold to discard variable modifications under the same peptide match with scores beyond the top-n. @@ -559,10 +564,10 @@ validation.} \item{svm_iters}{The number of iteration in SVM learning.} -\item{by_modules}{Logical. Experimenting. At the TRUE default, searches MS -data by individual modules of combinatorial fixed and variable -modifications. If FALSE, search all modules together. The later would -probably need more than 32G RAM if the number of modules is over 96.} +\item{by_modules}{Not used. Logical. At the TRUE default, searches MS data by +individual modules of combinatorial fixed and variable modifications. If +FALSE, search all modules together. The later would probably need more than +32G RAM if the number of modules is over 96.} \item{digits}{A non-negative integer; the number of decimal places to be used. The default is 4.} @@ -650,7 +655,6 @@ matchMS( out_path = "~/mzion/examples", ) - # TMT-16plex, phospho matchMS( fixedmods = c("TMTpro (N-term)", "TMTpro (K)", "Carbamidomethyl (C)"), @@ -663,6 +667,16 @@ matchMS( out_path = "~/mzion/examples", ) +# TMT-18plex +matchMS( + fixedmods = c("TMTpro (N-term)", "TMTpro (K)", "Carbamidomethyl (C)"), + varmods = c("Acetyl (Protein N-term)", "Oxidation (M)", + "Deamidated (N)", "Gln->pyro-Glu (N-term = Q)"), + quant = "tmt18", + fdr_type = "psm", + out_path = "~/mzion/examples", +) + # Bruker's PASEF matchMS( fixedmods = c("Carbamidomethyl (C)"), diff --git a/man/matchMS_par_groups.Rd b/man/matchMS_par_groups.Rd index edfc241..2218bc5 100644 --- a/man/matchMS_par_groups.Rd +++ b/man/matchMS_par_groups.Rd @@ -13,7 +13,7 @@ matchMS_par_groups( ) } \arguments{ -\item{par_groups}{A low -priority feature. Parameter(s) of \code{matchMS} +\item{par_groups}{A low-priority feature. Parameter(s) of \code{matchMS} multiplied by sets of values in groups. Multiple searches will be performed separately against the parameter groups. For instance with one set of samples in SILAC light and the other in SILAC heavy, the experimenters may diff --git a/man/mframes_adv.Rd b/man/mframes_adv.Rd index 17bd737..222a61b 100644 --- a/man/mframes_adv.Rd +++ b/man/mframes_adv.Rd @@ -22,8 +22,7 @@ mframes_adv( ppm_ms1 = 10L, ppm_ms2 = 10L, min_ms2mass = 115L, - index_mgf_ms2 = FALSE, - digits = 4L + index_mgf_ms2 = FALSE ) } \arguments{ @@ -107,9 +106,6 @@ interrogation. The default is 110.} 20}. Due to the low resolution in mass deltas at \code{index_mgf_ms2 = TRUE}, the fields of \code{pep_ms2_deltas, pep_ms2_deltas2, pep_ms2_deltas_mean, pep_ms2_deltas_sd} are nullified in the outputs.} - -\item{digits}{A non-negative integer; the number of decimal places to be -used. The default is 4.} } \value{ Matches to each MGF as a list elements. The length of the output is diff --git a/man/ms2ions_by_type.Rd b/man/ms2ions_by_type.Rd index 0d80b50..fe90e40 100644 --- a/man/ms2ions_by_type.Rd +++ b/man/ms2ions_by_type.Rd @@ -4,7 +4,7 @@ \alias{ms2ions_by_type} \title{Helper: switches among ion types for calculating MS2 masses.} \usage{ -ms2ions_by_type(aam, ntmass, ctmass, type_ms2ions = "by", digits = 4L) +ms2ions_by_type(aam, ntmass, ctmass, type_ms2ions = "by") } \arguments{ \item{aam}{A sequence of amino-acid residues with \emph{masses}. Residues @@ -19,9 +19,6 @@ corresponds to residues without masses).} \href{http://www.matrixscience.com/help/fragmentation_help.html}{ MS2 ions}. Values are in one of "by", "ax" and "cz". The default is "by" for b- and y-ions.} - -\item{digits}{A non-negative integer; the number of decimal places to be -used. The default is 4.} } \description{ Helper: switches among ion types for calculating MS2 masses. diff --git a/man/ms2match.Rd b/man/ms2match.Rd index f5b9763..04ab614 100644 --- a/man/ms2match.Rd +++ b/man/ms2match.Rd @@ -8,6 +8,7 @@ ms2match( mgf_path, aa_masses_all, out_path, + .path_bin, mod_indexes, type_ms2ions = "by", maxn_vmods_per_pep = 5L, @@ -25,6 +26,8 @@ ms2match( ppm_reporters = 10L, by_modules = TRUE, reframe_mgfs = FALSE, + n_13c = NULL, + ms1_notches = 0, fasta, acc_type, acc_pattern, @@ -39,8 +42,7 @@ ms2match( max_miss, index_mgf_ms2 = FALSE, first_search = FALSE, - .savecall = TRUE, - digits = 4L + .savecall = TRUE ) } \arguments{ @@ -59,6 +61,8 @@ fixed and variable modifications.} \item{out_path}{A file path of outputs.} +\item{.path_bin}{The file path to binned precursor masses.} + \item{mod_indexes}{Integer; the indexes of fixed and/or variable modifications.} @@ -128,13 +132,20 @@ plexes. For example, apply \code{tmt16} for \code{tmt12} provided a set of \item{ppm_reporters}{A positive integer; the mass tolerance of MS2 reporter ions. The default is 10.} -\item{by_modules}{Logical. Experimenting. At the TRUE default, searches MS -data by individual modules of combinatorial fixed and variable -modifications. If FALSE, search all modules together. The later would -probably need more than 32G RAM if the number of modules is over 96.} +\item{by_modules}{Not used. Logical. At the TRUE default, searches MS data by +individual modules of combinatorial fixed and variable modifications. If +FALSE, search all modules together. The later would probably need more than +32G RAM if the number of modules is over 96.} \item{reframe_mgfs}{Logical; if TRUE, recalculates the frame indexes of MGFs.} +\item{n_13c}{Number(s) of 13C off-sets in precursor masses, for example, over +the range of \code{-1:2}. The default is 0.} + +\item{ms1_notches}{A numeric vector; notches (off-sets) in precursor masses, +e.g., \code{c(-79.966331, -97.976896)} to account fo the loss of a phospho +group and phosphoric acid in precursor masses.} + \item{fasta}{Character string(s) to the name(s) of fasta file(s) with prepended directory path. The experimenter needs to supply the files.} @@ -202,9 +213,6 @@ per peptide sequence for considerations. The default is 2.} or not.} \item{.savecall}{Logical; if TRUE, saves the current call.} - -\item{digits}{A non-negative integer; the number of decimal places to be -used. The default is 4.} } \description{ All files under `out_path` are removed if incur \code{calc_pepmasses} in the diff --git a/man/ms2match_all.Rd b/man/ms2match_all.Rd index f8de7bc..fd047e7 100644 --- a/man/ms2match_all.Rd +++ b/man/ms2match_all.Rd @@ -24,8 +24,7 @@ ms2match_all( ppm_ms2 = 10L, min_ms2mass = 115L, index_mgf_ms2 = FALSE, - df0 = NULL, - digits = 4L + df0 = NULL ) } \arguments{ @@ -119,9 +118,6 @@ interrogation. The default is 110.} pep_ms2_deltas_sd} are nullified in the outputs.} \item{df0}{An output template} - -\item{digits}{A non-negative integer; the number of decimal places to be -used. The default is 4.} } \description{ One MGF chunk and all modules diff --git a/man/ms2match_one.Rd b/man/ms2match_one.Rd index 5830dcc..2e49a4e 100644 --- a/man/ms2match_one.Rd +++ b/man/ms2match_one.Rd @@ -5,6 +5,7 @@ \title{Matches experimentals and theoreticals} \usage{ ms2match_one( + mgth, pep_mod_group, aa_masses, FUN, @@ -25,11 +26,12 @@ ms2match_one( ppm_ms2 = 10L, min_ms2mass = 115L, index_mgf_ms2 = FALSE, - df0 = NULL, - digits = 4L + df0 = NULL ) } \arguments{ +\item{mgth}{MGF and theoretical pairs} + \item{pep_mod_group}{The index of peptide modification groups} \item{aa_masses}{An amino-acid look-up} @@ -122,9 +124,6 @@ interrogation. The default is 110.} pep_ms2_deltas_sd} are nullified in the outputs.} \item{df0}{An output template} - -\item{digits}{A non-negative integer; the number of decimal places to be -used. The default is 4.} } \description{ For a single module diff --git a/man/pair_mgftheo.Rd b/man/pair_mgftheos.Rd similarity index 81% rename from man/pair_mgftheo.Rd rename to man/pair_mgftheos.Rd index 77a4dc9..c3cce81 100644 --- a/man/pair_mgftheo.Rd +++ b/man/pair_mgftheos.Rd @@ -1,17 +1,19 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/ms2frames.R -\name{pair_mgftheo} -\alias{pair_mgftheo} +\name{pair_mgftheos} +\alias{pair_mgftheos} \title{Pairs MGF queries to theoretical MS1 masses and peptide sequences.} \usage{ -pair_mgftheo( +pair_mgftheos( mgf_path, n_modules, - .path_bin, + ms1_offsets = 0, by_modules = TRUE, - reframe_mgfs = FALSE, min_mass = 200L, + max_mass = 4500L, ppm_ms1_bin = 10L, + .path_bin, + reframe_mgfs = FALSE, first_search = FALSE ) } @@ -20,20 +22,26 @@ pair_mgftheo( \item{n_modules}{The number of modules (\code{length(aa_masses_all)}) or one} -\item{.path_bin}{The path to binned theoretical masses} +\item{ms1_offsets}{Off-sets in precursor masses (in relative to the values in +the original MGFs).} \item{by_modules}{Logical; if TRUE, results are saved with one mgf to one theo module. At FALSE, results are saved with one mgf paired to all theo modules} -\item{reframe_mgfs}{Logical; if TRUE, recalculates the frame indexes of MGFs.} - \item{min_mass}{A positive integer; the minimum precursor mass for interrogation. The default is an arbitrarily low value (the primary guard against low molecular-weight precursors is \code{min_len}).} +\item{max_mass}{A positive integer; the maximum precursor mass for +interrogation.} + \item{ppm_ms1_bin}{The tolerance in precursor mass error after mass binning.} +\item{.path_bin}{The path to binned theoretical masses} + +\item{reframe_mgfs}{Logical; if TRUE, recalculates the frame indexes of MGFs.} + \item{first_search}{Logical; is the first search (for MGF mass calibration) or not.} } diff --git a/man/scalc_pepprobs.Rd b/man/scalc_pepprobs.Rd index d1a94b9..9364f07 100644 --- a/man/scalc_pepprobs.Rd +++ b/man/scalc_pepprobs.Rd @@ -31,11 +31,11 @@ and y-ions.} \item{ppm_ms2}{A positive integer; the mass tolerance of MS2 species. The default is 20.} -\item{soft_secions}{Impacts on search performance not yet assessed. Logical; -if TRUE, collapses the intensities of secondary ions to primary ions even -when the primaries are absent. The default is FALSE. For instance, the -signal of \code{b5^*} will be ignored if its primary ion \code{b5} is not -matched.} +\item{soft_secions}{Logical; if TRUE, collapses the intensities of secondary +ions to primary ions even when the primaries are absent. The default is +FALSE. For instance, the signal of \code{b5^*} will be ignored if its +primary ion \code{b5} is not matched. The impacts of \code{soft_secions = +TRUE} on search performance has not yet been assessed.} \item{min_ms2mass}{A positive integer; the minimum MS2 mass for interrogation. The default is 110.} diff --git a/man/search_mgf.Rd b/man/search_mgf.Rd index ab4854a..cc27044 100644 --- a/man/search_mgf.Rd +++ b/man/search_mgf.Rd @@ -59,10 +59,10 @@ interrogation. The default is 110.} the fields of \code{pep_ms2_deltas, pep_ms2_deltas2, pep_ms2_deltas_mean, pep_ms2_deltas_sd} are nullified in the outputs.} -\item{by_modules}{Logical. Experimenting. At the TRUE default, searches MS -data by individual modules of combinatorial fixed and variable -modifications. If FALSE, search all modules together. The later would -probably need more than 32G RAM if the number of modules is over 96.} +\item{by_modules}{Not used. Logical. At the TRUE default, searches MS data by +individual modules of combinatorial fixed and variable modifications. If +FALSE, search all modules together. The later would probably need more than +32G RAM if the number of modules is over 96.} } \description{ Matches an MGF query diff --git a/man/unique_mvmods.Rd b/man/unique_mvmods.Rd index acd86db..b780d7b 100644 --- a/man/unique_mvmods.Rd +++ b/man/unique_mvmods.Rd @@ -13,8 +13,7 @@ unique_mvmods( maxn_vmods_per_pep = 5L, maxn_sites_per_vmod = 3L, .ms1_vmodsets = NULL, - .base_ent = NULL, - digits = 4L + .base_ent = NULL ) } \arguments{ @@ -48,9 +47,6 @@ default is 5.} \item{.ms1_vmodsets}{Not used.} \item{.base_ent}{Not used.} - -\item{digits}{A non-negative integer; the number of decimal places to be -used. The default is 4.} } \value{ Lists by residues in \code{amods}. diff --git a/man/vmods_elements.Rd b/man/vmods_elements.Rd index 17c549b..6750322 100644 --- a/man/vmods_elements.Rd +++ b/man/vmods_elements.Rd @@ -12,8 +12,7 @@ vmods_elements( maxn_vmods_per_pep = 5L, maxn_sites_per_vmod = 3L, .ms1_vmodsets = NULL, - .base_ent = NULL, - digits = 4L + .base_ent = NULL ) } \arguments{ @@ -46,9 +45,6 @@ default is 5.} \item{.ms1_vmodsets}{Not used.} \item{.base_ent}{Not used.} - -\item{digits}{A non-negative integer; the number of decimal places to be -used. The default is 4.} } \description{ The same residue, e.g. M, at different modifications, c("Carbamyl (M",