From 10651ee5e7dd49a9dcc16f418c1b1ef57b8b9069 Mon Sep 17 00:00:00 2001
From: Qiang Zhang <45829450+qzhang503@users.noreply.github.com>
Date: Tue, 27 Jun 2023 12:50:51 -0500
Subject: [PATCH] 1.2.8.2
Minor bug fixes and updates with precursor mass calibrations.
---
DESCRIPTION | 4 +-
R/mgfs.R | 40 +++++-
R/ms2_gen.R | 18 +--
R/ms2frames.R | 185 +++++++++++++++++--------
R/msmsmatches.R | 158 +++++++++++++++------
R/msmsmatches2.R | 296 ++++++++++++++++++++++++++++++++--------
R/quant2.R | 2 +-
R/scores.R | 102 ++++++++------
R/unimods.R | 200 ++++++++++++---------------
inst/extdata/custom.xml | 33 +++++
man/calc_pepfdr.Rd | 5 +-
man/calc_peploc.Rd | 5 +-
man/calc_pepscores.Rd | 4 -
man/calcpepsc.Rd | 4 -
man/calib_mgf.Rd | 2 +-
man/calib_ms1.Rd | 2 +-
man/check_locmods.Rd | 28 ++--
man/check_notches.Rd | 24 ++++
man/comb_ms1_offsets.Rd | 26 ++++
man/combine_fracs.Rd | 9 +-
man/find_ms1_offsets.Rd | 11 +-
man/find_optbs.Rd | 19 +++
man/find_optns.Rd | 19 +++
man/hadd_primatches.Rd | 5 +-
man/hcalc_tmtint.Rd | 4 -
man/hms2match.Rd | 23 ++++
man/hms2match_one.Rd | 30 +++-
man/matchMS.Rd | 31 ++++-
man/mprepBrukerMGF.Rd | 2 +-
man/ms2match.Rd | 27 +++-
man/ms2match_one.Rd | 11 +-
man/order_fracs3.Rd | 18 +++
man/post_calib.Rd | 33 +++++
man/prep_pepfdr_td.Rd | 7 +-
man/table_unimods.Rd | 9 +-
35 files changed, 1004 insertions(+), 392 deletions(-)
create mode 100644 man/check_notches.Rd
create mode 100644 man/comb_ms1_offsets.Rd
create mode 100644 man/find_optbs.Rd
create mode 100644 man/find_optns.Rd
create mode 100644 man/order_fracs3.Rd
create mode 100644 man/post_calib.Rd
diff --git a/DESCRIPTION b/DESCRIPTION
index 4935ee0..245b279 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,7 +1,7 @@
Package: mzion
Type: Package
-Title: Database Searches of Proteomic Mass-spectrometrirc Data
-Version: 1.2.8.1
+Title: Proteomics Database Searches of Mass-spectrometrirc Data
+Version: 1.2.8.2
Authors@R:
person(given = "Qiang",
family = "Zhang",
diff --git a/R/mgfs.R b/R/mgfs.R
index dd65fec..730a6b5 100644
--- a/R/mgfs.R
+++ b/R/mgfs.R
@@ -1598,13 +1598,49 @@ prepBrukerMGF <- function (file = NULL, begin_offset = 5L, charge_offset = 5L)
message("Processing: ", file)
lines <- readLines(file)
+ hd <- lines[1:100]
+ ### Some MGFs may have additional "SCANS=" lines
+ pat_sc <- "SCANS"
+ sc <- .Internal(which(stringi::stri_startswith_fixed(hd, pat_sc)))
+
+ if (length(sc)) {
+ lscs <- .Internal(which(stringi::stri_startswith_fixed(lines, pat_sc)))
+ lines <- lines[-lscs]
+ rm(list = "lscs")
+ }
+ rm(list = c("pat_sc", "sc"))
+
+ ###PrecursorID:
+ pat_pr <- "###PrecursorID"
+ pr <- .Internal(which(stringi::stri_startswith_fixed(hd, pat_pr)))
+
+ if (length(pr)) {
+ lprs <- .Internal(which(stringi::stri_startswith_fixed(lines, pat_pr)))
+ lines <- lines[-lprs]
+ rm(list = "lprs")
+ }
+ rm(list = c("pat_pr", "pr"))
+
+ ###CCS:
+ pat_ccs <- "###CCS"
+ ccs <- .Internal(which(stringi::stri_startswith_fixed(hd, pat_ccs)))
+
+ if (length(ccs)) {
+ ccs <- .Internal(which(stringi::stri_startswith_fixed(lines, pat_ccs)))
+ lines <- lines[-ccs]
+ rm(list = "ccs")
+ }
+ rm(list = c("pat_ccs", "ccs"))
+
+ ## Processing
begins <- .Internal(which(stringi::stri_startswith_fixed(lines, "BEGIN IONS")))
ends <- .Internal(which(stringi::stri_endswith_fixed(lines, "END IONS")))
hdrs <- 1:(begins[1]-begin_offset-1L)
zls <- lines[begins+5L]
- oks <- grepl("^CHARGE", zls) & (zls != "CHARGE=1+")
+ # oks <- grepl("^CHARGE", zls) & (zls != "CHARGE=1+")
+ oks <- grepl("^CHARGE", zls) & (zls != "CHARGE=1+") & grepl("^###Mobility", lines[begins-1L])
rm(list = "zls")
b_oks <- begins[oks] - begin_offset
@@ -1623,7 +1659,7 @@ prepBrukerMGF <- function (file = NULL, begin_offset = 5L, charge_offset = 5L)
#' @param filepath A file path to MGF.
#' @param n_cores The number of CPU cores.
#' @export
-mprepBrukerMGF <- function (filepath, n_cores = 48L)
+mprepBrukerMGF <- function (filepath, n_cores = 32L)
{
message("Preparing Bruker's MGFs.")
diff --git a/R/ms2_gen.R b/R/ms2_gen.R
index 70aa9b1..899a2d4 100644
--- a/R/ms2_gen.R
+++ b/R/ms2_gen.R
@@ -100,8 +100,6 @@ gen_ms2ions_base <- function (aa_seq = NULL, ms1_mass = NULL,
mod_indexes = NULL,
type_ms2ions = "by", maxn_vmods_per_pep = 5L,
maxn_sites_per_vmod = 3L,
-
- # dummy
maxn_fnl_per_seq = 3L, maxn_vnl_per_seq = 3L,
maxn_vmods_sitescombi_per_pep = 64L)
{
@@ -182,11 +180,13 @@ gen_ms2ions_a0_vnl0_fnl1 <- function (aa_seq, ms1_mass = NULL,
maxn_vmods_per_pep = 5L,
maxn_sites_per_vmod = 3L,
maxn_fnl_per_seq = 3L,
-
- # dummy
maxn_vnl_per_seq = 3L,
maxn_vmods_sitescombi_per_pep = 64L)
{
+ # cannot distinguish a neutral loss is in-source or in-MS2:
+ # "+ms1_offset and +ms2_nl" versus "-ms1_offset and -ms2_nl"
+ # if (ms1_offset != 0) maxn_vnl_per_seq = 1L
+
if (maxn_fnl_per_seq < 2L)
return(
gen_ms2ions_base(aa_seq = aa_seq, ms1_mass = ms1_mass,
@@ -478,10 +478,8 @@ gen_ms2ions_a1_vnl0_fnl0 <- function (aa_seq, ms1_mass = NULL, aa_masses = NULL,
mod_indexes = NULL, type_ms2ions = "by",
maxn_vmods_per_pep = 5L,
maxn_sites_per_vmod = 3L,
-
- # dummy
- maxn_fnl_per_seq = 3L, maxn_vnl_per_seq = 3L,
-
+ maxn_fnl_per_seq = 3L,
+ maxn_vnl_per_seq = 3L,
maxn_vmods_sitescombi_per_pep = 64L)
{
aas <- .Internal(strsplit(aa_seq, "", fixed = TRUE, perl = FALSE, useBytes = FALSE))
@@ -770,8 +768,6 @@ gen_ms2ions_a1_vnl0_fnl1 <- function (aa_seq = NULL, ms1_mass = NULL,
maxn_sites_per_vmod = 3L,
maxn_vmods_sitescombi_per_pep = 64L,
maxn_fnl_per_seq = 3L,
-
- # dummy
maxn_vnl_per_seq = 3L)
{
if (maxn_fnl_per_seq < 2L)
@@ -1146,8 +1142,6 @@ gen_ms2ions_a1_vnl1_fnl0 <- function (aa_seq = NULL, ms1_mass = NULL,
maxn_vmods_per_pep = 5L,
maxn_sites_per_vmod = 3L,
maxn_vmods_sitescombi_per_pep = 64L,
-
- # dummy
maxn_fnl_per_seq = 3L,
maxn_vnl_per_seq = 3L)
{
diff --git a/R/ms2frames.R b/R/ms2frames.R
index 4a44468..8bcb8d7 100644
--- a/R/ms2frames.R
+++ b/R/ms2frames.R
@@ -19,7 +19,7 @@ pair_mgftheos <- function (mgf_path, n_modules, ms1_offsets = 0,
message("Pairing experimental and theoretical data.")
tempfiles <- if (by_modules)
- list.files(mgf_path, pattern = "^expttheo_", full.names = TRUE)
+ list.files(mgf_path, pattern = "^expt|^theo", full.names = TRUE)
else
list.files(mgf_path, pattern = "^mgftheo_", full.names = TRUE)
@@ -33,6 +33,9 @@ pair_mgftheos <- function (mgf_path, n_modules, ms1_offsets = 0,
# data thinning for MGF calibrations
if (first_search) {
mgfs <- lapply(mgfs, function (x) {
+ if ((fct <- ceiling(nrow(x)/50000L)) == 1L)
+ return(x)
+
min_mgfmass <- min(x$ms1_mass, na.rm = TRUE)
max_mgfmass <- max(x$ms1_mass, na.rm = TRUE)
oks_min <- with(x, ms1_mass <= min_mgfmass + 10L)
@@ -41,20 +44,24 @@ pair_mgftheos <- function (mgf_path, n_modules, ms1_offsets = 0,
mgfa <- x[oks_max, ]
mgfb <- x[oks_min, ]
mgfc <- x[!(oks_max | oks_min), ]
- rows <- (1:nrow(mgfc)) %% 10L == 1L
+ rows <- (1:nrow(mgfc)) %% fct == 1L
dplyr::bind_rows(mgfa, mgfc[rows, ], mgfb)
})
}
- ## output expttheo_1_1.rds: expttheo_module[_notch].rds
mgfs <- dplyr::bind_rows(mgfs)
- mapply(hpair_mgths, ms1_offsets, seq_along(ms1_offsets),
+ notches <- seq_along(ms1_offsets)
+
+ mapply(hpair_mgths, ms1_offsets, notches,
MoreArgs = list(
mgfs = mgfs, n_modules = n_modules, by_modules = by_modules,
mgf_path = mgf_path, min_mass = min_mass, max_mass = max_mass,
ppm_ms1_bin = ppm_ms1_bin, .path_bin = .path_bin),
SIMPLIFY = FALSE, USE.NAMES = FALSE)
+
+ qs::qsave(data.frame(ms1_offset = ms1_offsets, notch = notches),
+ file.path(mgf_path, "notches.rds"))
invisible(NULL)
}
@@ -125,7 +132,7 @@ hpair_mgths <- function (ms1_offset = 0, notch = NULL, mgfs, n_modules,
}
# rm(list = c("theos", "mfrs", "thfrs", "mins", "maxs"))
- # (2) removes mgfs[[i]] not be found in anstheo[[i]]
+ # (2) removes mgfs[[i]] not found in any anstheo[[i]]
for (i in seq_along(mgfs)) {
mi <- mgfs[[i]]
fi <- names(mi)
@@ -138,10 +145,9 @@ hpair_mgths <- function (ms1_offset = 0, notch = NULL, mgfs, n_modules,
}
# rm(list = c("mi", "ti", "fi", "oks"))
- if (length(mgfs) == 1L && !length(mgfs[[1]])) {
+ if (length(mgfs) == 1L && !length(mgfs[[1]]))
mgfs[1] <- list(NULL)
- }
-
+
# (3) removes unused frames of `anstheo`
# (mgfs determines the length of each anstheo[[i]];
# more effective to first generate all bracketed mgfs indexes and apply
@@ -158,9 +164,13 @@ hpair_mgths <- function (ms1_offset = 0, notch = NULL, mgfs, n_modules,
# (5) outputs (in chunks)
if (by_modules) {
+ qs::qsave(mgfs,
+ file.path(mgf_path, paste0("expt_", 0, "_", notch, ".rds")),
+ preset = "fast")
+
for (i in seq_len(n_modules))
- qs::qsave(list(mgf_frames = mgfs, theopeps = anstheo[[i]]),
- file.path(mgf_path, paste0("expttheo_", i, "_", notch, ".rds")),
+ qs::qsave(anstheo[[i]],
+ file.path(mgf_path, paste0("theo_", i, "_", notch, ".rds")),
preset = "fast")
}
else {
@@ -173,24 +183,7 @@ hpair_mgths <- function (ms1_offset = 0, notch = NULL, mgfs, n_modules,
# next version: note that mgfs are the same for different modules
# also save by fractions -> avoid sendData in parallel by read from disk
- if (FALSE) {
- if (by_modules) (
- # expt_notch_fraction.rds
- # theo_module_notch_fraction.rds
- mapply(function (data, frc) {
- fi <- file.path(mgf_path, paste0("expt_", 0, "_", notch, "_", frc, ".rds"))
- qs::qsave(data, fi, preset = "fast")
- }, mgfs, seq_along(mgfs))
- )
- else {
- for (i in seq_along(mgfs))
- qs::qsave(list(mgf_frames = mgfs[[i]],
- theopeps = lapply(anstheo, `[[`, i)),
- file.path(mgf_path, paste0("mgftheo_", i, ".rds")),
- preset = "fast")
- }
- }
-
+
invisible(NULL)
}
@@ -212,6 +205,8 @@ hpair_mgths <- function (ms1_offset = 0, notch = NULL, mgfs, n_modules,
#' @inheritParams matchMS
#' @inheritParams pair_mgftheos
hms2match <- function (aa_masses_all, funs_ms2, ms1vmods_all, ms2vmods_all,
+ ms1_neulosses = NULL, maxn_neulosses_fnl = 1L,
+ maxn_neulosses_vnl = 1L,
mod_indexes, mgf_path, out_path,
type_ms2ions = "by", maxn_vmods_per_pep = 5L,
maxn_sites_per_vmod = 3L, maxn_fnl_per_seq = 3L,
@@ -221,8 +216,10 @@ hms2match <- function (aa_masses_all, funs_ms2, ms1vmods_all, ms2vmods_all,
min_ms2mass = 115L, index_mgf_ms2 = FALSE,
by_modules = FALSE, df0 = NULL)
{
- pat <- if (by_modules) "^expttheo" else "^mgftheo"
- mgths <- order_fracs(type = pat, tempdir = mgf_path, by_modules = by_modules)
+ pth <- if (by_modules) "theo" else "mgftheo"
+ pex <- if (by_modules) "expt" else "mgftheo"
+ theos <- order_fracs(type = pth, tempdir = mgf_path, by_modules = by_modules)
+ expts <- order_fracs(type = pex, tempdir = mgf_path, by_modules = by_modules)
message("\n=== MS2 ion searches started at ", Sys.time(), ". ===\n")
@@ -248,14 +245,18 @@ hms2match <- function (aa_masses_all, funs_ms2, ms1vmods_all, ms2vmods_all,
if (by_modules) {
parallel::clusterExport(cl, c("frames_adv"), envir = environment(mzion::matchMS))
- for (i in seq_along(aa_masses_all))
+ for (i in seq_along(aa_masses_all)) {
hms2match_one(
pep_mod_group = i,
- mgths = mgths[[i]],
+ nms_theo = theos[[i]],
+ nms_expt = expts[[1]],
aa_masses = aa_masses_all[[i]],
FUN = funs_ms2[[i]],
ms1vmods = ms1vmods_all[[i]],
ms2vmods = ms2vmods_all[[i]],
+ ms1_neulosses = ms1_neulosses,
+ maxn_neulosses_fnl = maxn_neulosses_fnl,
+ maxn_neulosses_vnl = maxn_neulosses_vnl,
cl = cl,
mod_indexes = mod_indexes,
mgf_path = mgf_path,
@@ -269,6 +270,7 @@ hms2match <- function (aa_masses_all, funs_ms2, ms1vmods_all, ms2vmods_all,
minn_ms2 = minn_ms2, ppm_ms1 = ppm_ms1, ppm_ms2 = ppm_ms2,
min_ms2mass = min_ms2mass, index_mgf_ms2 = index_mgf_ms2,
df0 = df0)
+ }
}
else {
message("Check search progress at: ", logs)
@@ -334,7 +336,7 @@ ms2match_all <- function (mgth, aa_masses_all, funs_ms2, ms1vmods_all,
if (!length(mgf_frames)) {
qs::qsave(df0, file.path(out_path, "temp", out_name))
- return(df0)
+ return(NULL)
}
df <- mframes_adv(
@@ -1275,7 +1277,8 @@ search_mgf <- function (expt_mass_ms1 = NULL, expt_moverz_ms2 = NULL,
#' For a single module
#'
#' @param pep_mod_group The index of peptide modification groups.
-#' @param mgths Pairs of experimental and theoretical data.
+#' @param nms_expt Names of experimental files.
+#' @param nms_theo Names of theoretical files.
#' @param aa_masses An amino-acid look-up.
#' @param FUN A function, e.g., \link{gen_ms2ions_base}, with an i-th module of
#' \code{aa_masses}.
@@ -1286,8 +1289,9 @@ search_mgf <- function (expt_mass_ms1 = NULL, expt_moverz_ms2 = NULL,
#' @param cl The value of clusters for parallel processes.
#' @param df0 An output template.
#' @inheritParams hms2match
-hms2match_one <- function (pep_mod_group, mgths, aa_masses, FUN,
- ms1vmods, ms2vmods, cl,
+hms2match_one <- function (pep_mod_group, nms_theo, nms_expt, aa_masses, FUN,
+ ms1vmods, ms2vmods, cl, ms1_neulosses = NULL,
+ maxn_neulosses_fnl = 1L, maxn_neulosses_vnl = 1L,
mod_indexes, mgf_path, out_path, type_ms2ions = "by",
maxn_vmods_per_pep = 5L, maxn_sites_per_vmod = 3L,
maxn_fnl_per_seq = 3L, maxn_vnl_per_seq = 3L,
@@ -1302,11 +1306,43 @@ hms2match_one <- function (pep_mod_group, mgths, aa_masses, FUN,
message("Matching against: ",
if (nchar(nm_vmods) == 0L) nm_fmods else paste0(nm_fmods, " | ", nm_vmods))
- df <- vector("list", length(mgths))
+ # subset notches: c("Phospho (S)", "Phospho (T)", "Phospho (Y)")
+ if (length(ms1_neulosses)) {
+ if (length(vmods_nl <- attr(aa_masses, "vmods_nl", exact = TRUE))) {
+ nls <- extract_umods(ms1_neulosses)
+ oksnl <- lapply(nls, function (x) length(x[["nl"]]) > 1L)
+ nls <- nls[unlist(oksnl)]
+
+ if (length(nls) && any(oks <- names(nls) %in% names(vmods_nl))) {
+ notches <- unique(unlist(lapply(nls[oks], `[[`, "nl")))
+ notches <- -round(notches, digits = 4)
+
+ df <- qs::qread(file.path(mgf_path, "notches.rds"))
+ allowed <- df[with(df, ms1_offset %in% notches), ]$notch # at least one
+
+ univ <- as.integer(gsub("^theo_\\d+_(\\d+)\\.rds", "\\1", nms_theo))
+ idxes <- univ %in% allowed
+
+ nms_theo <- nms_theo[idxes] # at least length-1 of offset-0
+ nms_expt <- nms_expt[idxes]
+ }
+ else {
+ # Oxidation (M)
+ nms_theo <- nms_theo[1]
+ nms_expt <- nms_expt[1]
+ }
+ }
+ else {
+ # Deamidated (N)
+ nms_theo <- nms_theo[1]
+ nms_expt <- nms_expt[1]
+ }
+ }
- for (i in seq_along(mgths)) {
- df[[i]] <- ms2match_one(
- mgths[[i]],
+ if ((len <- length(nms_theo)) == 1L) {
+ df <- ms2match_one(
+ nms_theo = nms_theo[[1]],
+ nms_expt = nms_expt[[1]],
pep_mod_group = pep_mod_group, aa_masses = aa_masses, FUN = FUN,
ms1vmods = ms1vmods, ms2vmods = ms2vmods, cl = cl,
mod_indexes = mod_indexes, mgf_path = mgf_path, out_path = out_path,
@@ -1320,9 +1356,49 @@ hms2match_one <- function (pep_mod_group, mgths, aa_masses, FUN,
min_ms2mass = min_ms2mass, index_mgf_ms2 = index_mgf_ms2,
df0 = df0)
}
-
- df <- dplyr::bind_rows(df)
-
+ else {
+ df <- vector("list", len)
+
+ df[[1]] <- ms2match_one(
+ nms_theo = nms_theo[[1]],
+ nms_expt = nms_expt[[1]],
+ pep_mod_group = pep_mod_group, aa_masses = aa_masses, FUN = FUN,
+ ms1vmods = ms1vmods, ms2vmods = ms2vmods, cl = cl,
+ mod_indexes = mod_indexes, mgf_path = mgf_path, out_path = out_path,
+ type_ms2ions = type_ms2ions,
+ maxn_vmods_per_pep = maxn_vmods_per_pep,
+ maxn_sites_per_vmod = maxn_sites_per_vmod,
+ maxn_fnl_per_seq = maxn_fnl_per_seq,
+ maxn_vnl_per_seq = maxn_vnl_per_seq,
+ maxn_vmods_sitescombi_per_pep = maxn_vmods_sitescombi_per_pep,
+ minn_ms2 = minn_ms2, ppm_ms1 = ppm_ms1, ppm_ms2 = ppm_ms2,
+ min_ms2mass = min_ms2mass, index_mgf_ms2 = index_mgf_ms2,
+ df0 = df0)
+
+ # neutral losses: maxn_neulosses_vnl and maxn_neulosses_fnl
+ for (i in 2:len) {
+ df[[i]] <- ms2match_one(
+ nms_theo = nms_theo[[i]],
+ nms_expt = nms_expt[[i]],
+ pep_mod_group = pep_mod_group, aa_masses = aa_masses, FUN = FUN,
+ ms1vmods = ms1vmods, ms2vmods = ms2vmods, cl = cl,
+ mod_indexes = mod_indexes, mgf_path = mgf_path, out_path = out_path,
+ type_ms2ions = type_ms2ions,
+ maxn_vmods_per_pep = maxn_vmods_per_pep,
+ maxn_sites_per_vmod = maxn_sites_per_vmod,
+ maxn_fnl_per_seq = maxn_neulosses_fnl,
+ maxn_vnl_per_seq = maxn_neulosses_vnl,
+ maxn_vmods_sitescombi_per_pep = maxn_vmods_sitescombi_per_pep,
+ minn_ms2 = minn_ms2, ppm_ms1 = ppm_ms1, ppm_ms2 = ppm_ms2,
+ min_ms2mass = min_ms2mass, index_mgf_ms2 = index_mgf_ms2,
+ df0 = df0)
+ }
+
+ empties <- unlist(lapply(df, is.null))
+ df <- df[!empties]
+ df <- dplyr::bind_rows(df)
+ }
+
out_nm <- file.path(out_path, "temp", paste0("ion_matches_", pep_mod_group, ".rds"))
if (is.null(df)) {
@@ -1345,9 +1421,7 @@ hms2match_one <- function (pep_mod_group, mgths, aa_masses, FUN,
pep_scan_num = scan_num,
pep_exp_z = ms1_charge,
pep_ms2_moverzs = ms2_moverz,
- pep_ms2_ints = ms2_int,
- # pep_frame = frame,
- )
+ pep_ms2_ints = ms2_int, )
# df[["pep_scan_num"]] <- as.character(df[["pep_scan_num"]])
df <- reloc_col_after(df, "raw_file", "scan_num")
df <- reloc_col_after(df, "pep_mod_group", "raw_file")
@@ -1355,11 +1429,12 @@ hms2match_one <- function (pep_mod_group, mgths, aa_masses, FUN,
}
-#' Matches experimentals and theoreticals
+#' Matches experimentals and theoreticals.
#'
-#' For a single module
+#' For a single module and single notch.
#'
-#' @param mgth MGF and theoretical pairs
+#' @param nms_expt Names of experimental files.
+#' @param nms_theo Names of theoretical files.
#' @param pep_mod_group The index of peptide modification groups
#' @param aa_masses An amino-acid look-up
#' @param FUN A function, e.g., \link{gen_ms2ions_base}, with an i-th module of
@@ -1371,7 +1446,7 @@ hms2match_one <- function (pep_mod_group, mgths, aa_masses, FUN,
#' @param cl The value of clusters for parallel processes
#' @param df0 An output template
#' @inheritParams hms2match
-ms2match_one <- function (mgth, pep_mod_group, aa_masses, FUN,
+ms2match_one <- function (nms_theo, nms_expt, pep_mod_group, aa_masses, FUN,
ms1vmods, ms2vmods, cl,
mod_indexes, mgf_path, out_path, type_ms2ions = "by",
maxn_vmods_per_pep = 5L, maxn_sites_per_vmod = 3L,
@@ -1381,11 +1456,9 @@ ms2match_one <- function (mgth, pep_mod_group, aa_masses, FUN,
min_ms2mass = 115L, index_mgf_ms2 = FALSE,
df0 = NULL)
{
- out_name <- gsub("^expttheo", "ion_matches", mgth)
- mgftheo <- qs::qread(file.path(mgf_path, mgth))
- mgf_frames <- mgftheo[["mgf_frames"]]
- theopeps <- mgftheo[["theopeps"]]
- rm(list = "mgftheo")
+ out_name <- gsub("^theo", "ion_matches", nms_theo)
+ mgf_frames <- qs::qread(file.path(mgf_path, nms_expt))
+ theopeps <- qs::qread(file.path(mgf_path, nms_theo))
len <- length(mgf_frames)
@@ -1438,6 +1511,8 @@ ms2match_one <- function (mgth, pep_mod_group, aa_masses, FUN,
FUN = FUN),
.scheduling = "dynamic")
+ empties <- unlist(lapply(df, is.null))
+ df <- df[!empties]
df <- dplyr::bind_rows(df)
}
diff --git a/R/msmsmatches.R b/R/msmsmatches.R
index 113d4ef..36c26bc 100644
--- a/R/msmsmatches.R
+++ b/R/msmsmatches.R
@@ -171,8 +171,25 @@
#' @param n_13c Number(s) of 13C off-sets in precursor masses, for example, over
#' the range of \code{-1:2}. The default is 0.
#' @param ms1_notches A numeric vector; notches (off-sets) in precursor masses,
-#' e.g., \code{c(-79.966331, -97.976896)} to account fo the loss of a phospho
-#' group and phosphoric acid in precursor masses.
+#' e.g., \code{c(-97.976896)} to account fo the loss of a phosphoric acid in
+#' precursor masses.
+#' @param ms1_neulosses Character string(s) specifying the neutral losses of
+#' precursors. The nomenclature is the same as those in argument
+#' \code{varmods}, e.g., \code{c("Phospho (S)", "Phospho (T)", "Phospho
+#' (Y)")}.
+#'
+#' The argument is a simplified (narrower) usage of argument
+#' \code{ms1_notches} with additional specificity in modifications and sites.
+#' @param maxn_neulosses_fnl A positive integer used in conjunction with
+#' arguments \code{ms1_notches} and \code{ms1_neulosses}. The maximum number
+#' of fixed MS2 neutral losses to be considered when searching against peak
+#' lists with MS1 mass off-sets. The default is two (one MS2 neutral loss in
+#' addition to 0).
+#' @param maxn_neulosses_vnl A positive integer used in conjunction with
+#' arguments \code{ms1_notches} and \code{ms1_neulosses}. The maximum number
+#' of variable MS2 neutral losses to be considered when searching against peak
+#' lists with MS1 mass off-sets. The default is two (one MS2 neutral loss in
+#' addition to 0).
#' @param par_groups A low-priority feature. Parameter(s) of \code{matchMS}
#' multiplied by sets of values in groups. Multiple searches will be performed
#' separately against the parameter groups. For instance with one set of
@@ -478,9 +495,9 @@
#' matchMS(
#' fixedmods = c("TMTpro (N-term)", "TMTpro (K)", "Carbamidomethyl (C)"),
#' varmods = c("Acetyl (Protein N-term)", "Oxidation (M)",
-#' "Deamidated (N)", "Gln->pyro-Glu (N-term = Q)"),
+#' "Deamidated (N)", "Deamidated (Q)",
+#' "Gln->pyro-Glu (N-term = Q)"),
#' quant = "tmt18",
-#' fdr_type = "psm",
#' out_path = "~/mzion/examples",
#' )
#'
@@ -659,6 +676,11 @@ matchMS <- function (out_path = "~/mzion/outs",
"Oxidation (M)", "Deamidated (N)",
"Gln->pyro-Glu (N-term = Q)"),
rm_dup_term_anywhere = TRUE,
+
+ ms1_neulosses = NULL,
+ maxn_neulosses_fnl = 2L,
+ maxn_neulosses_vnl = 2L,
+
fixedlabs = NULL,
varlabs = NULL,
locmods = c("Phospho (S)", "Phospho (T)", "Phospho (Y)"),
@@ -732,7 +754,7 @@ matchMS <- function (out_path = "~/mzion/outs",
min_ret_time = 0, max_ret_time = Inf,
calib_ms1mass = FALSE,
ppm_ms1calib = 20L,
-
+
add_ms2theos = FALSE, add_ms2theos2 = FALSE,
add_ms2moverzs = FALSE, add_ms2ints = FALSE,
@@ -798,7 +820,7 @@ matchMS <- function (out_path = "~/mzion/outs",
# modifications
fixedmods <- sort(fixedmods)
varmods <- sort(varmods)
- locmods <- check_locmods(fixedmods, varmods, locmods)
+ locmods <- check_locmods(locmods, fixedmods, varmods, ms1_neulosses)
db_ord <- order(fasta)
fasta <- fasta[db_ord]
@@ -820,6 +842,7 @@ matchMS <- function (out_path = "~/mzion/outs",
# numeric types
stopifnot(vapply(c(maxn_fasta_seqs, maxn_vmods_setscombi, maxn_vmods_per_pep,
maxn_sites_per_vmod, maxn_fnl_per_seq, maxn_vnl_per_seq,
+ ms1_notches, maxn_neulosses_fnl, maxn_neulosses_vnl,
maxn_vmods_sitescombi_per_pep,
min_len, max_len, max_miss, topn_ms2ions, minn_ms2,
min_mass, max_mass, min_ms2mass, max_ms2mass, n_13c,
@@ -854,6 +877,8 @@ matchMS <- function (out_path = "~/mzion/outs",
maxn_vmods_sitescombi_per_pep <- as.integer(maxn_vmods_sitescombi_per_pep)
maxn_fnl_per_seq <- as.integer(maxn_fnl_per_seq)
maxn_vnl_per_seq <- as.integer(maxn_vnl_per_seq)
+ maxn_neulosses_fnl <- as.integer(maxn_neulosses_fnl)
+ maxn_neulosses_vnl <- as.integer(maxn_neulosses_vnl)
min_len <- as.integer(min_len)
max_len <- as.integer(max_len)
max_miss <- as.integer(max_miss)
@@ -880,8 +905,9 @@ matchMS <- function (out_path = "~/mzion/outs",
stopifnot(min_len >= 1L, max_len >= min_len, max_miss <= 10L, minn_ms2 >= 2L,
min_mass >= 1L, max_mass >= min_mass,
min_ms2mass >= 1L, max_ms2mass > min_ms2mass,
- maxn_vmods_sitescombi_per_pep >= 2L,
- noenzyme_maxn >= 0L,
+ maxn_vmods_sitescombi_per_pep >= 2L, noenzyme_maxn >= 0L,
+ maxn_fnl_per_seq >= 0L, maxn_vnl_per_seq >= 0L,
+ maxn_neulosses_fnl >= 0L, maxn_neulosses_vnl >= 0L,
maxn_vmods_per_pep >= maxn_sites_per_vmod, max_n_prots > 1000L,
min_ms1_charge >= 1L, max_ms1_charge >= min_ms1_charge,
min_scan_num >= 1L, max_scan_num >= min_scan_num,
@@ -1034,9 +1060,19 @@ matchMS <- function (out_path = "~/mzion/outs",
rm(list = c("oks"))
# TMT
- check_tmt_pars(fixedmods, varmods, quant)
+ check_tmt_pars(fixedmods, varmods, quant)
+
+ # MS1 off-sets
+ if (!all(ms1_neulosses %in% varmods))
+ stop("Not all `ms1_neulosses` found in `varmods`.")
+
+ check_notches(ms1_notches = ms1_notches, ms1_neulosses = ms1_neulosses)
+ ms1_offsets <- find_ms1_offsets(n_13c = n_13c, ms1_notches = ms1_notches)
+ is_notched <- length(unique(c(ms1_offsets, ms1_neulosses))) > 1L
# system paths
+ homedir <- find_dir("~")
+
if (is.null(.path_cache)) {
.path_cache <- "~/mzion/.MSearches/Cache/Calls/"
}
@@ -1049,6 +1085,9 @@ matchMS <- function (out_path = "~/mzion/outs",
.path_fasta <- create_dir(.path_fasta)
.path_ms1masses <- create_dir(file.path(.path_fasta, "ms1masses"))
+
+ fasta <- lapply(fasta, function (x) if (grepl("~", x)) gsub("~", homedir, x) else x)
+ fasta <- unlist(fasta, recursive = FALSE, use.names = FALSE)
# Output path
out_path <- create_dir(out_path)
@@ -1218,19 +1257,20 @@ matchMS <- function (out_path = "~/mzion/outs",
enzyme = enzyme,
out_path = out_path)
- if (reframe_mgfs)
- .path_bin_calib <-
- bin_ms1masses(res = res,
- min_mass = min_mass,
- max_mass = max_mass,
- min_len = min_len,
- max_len = max_len,
- ppm_ms1 = ppm_ms1calib,
- use_ms1_cache = use_ms1_cache,
- .path_cache = .path_cache,
- .path_ms1masses = .path_ms1masses,
- enzyme = enzyme,
- out_path = out_path)
+ .path_bin_calib <- if (reframe_mgfs)
+ bin_ms1masses(res = res,
+ min_mass = min_mass,
+ max_mass = max_mass,
+ min_len = min_len,
+ max_len = max_len,
+ ppm_ms1 = ppm_ms1calib,
+ use_ms1_cache = use_ms1_cache,
+ .path_cache = .path_cache,
+ .path_ms1masses = .path_ms1masses,
+ enzyme = enzyme,
+ out_path = out_path)
+ else
+ .path_bin
if (exists("res"))
rm(list = "res")
@@ -1269,10 +1309,8 @@ matchMS <- function (out_path = "~/mzion/outs",
## MSMS matches
if (is.null(bypass_ms2match <- dots$bypass_ms2match))
bypass_ms2match <- FALSE
-
- .time_stamp <- find_ms1_times(out_path)
-
- if (length(.time_stamp) == 1L) {
+
+ if (length(.time_stamp <- find_ms1_times(out_path)) == 1L) {
path_time <- file.path(.path_ms1masses, .time_stamp)
file_aams <- file.path(path_time, "aa_masses_all.rds")
file_mods <- file.path(path_time, "mod_indexes.txt")
@@ -1311,10 +1349,8 @@ matchMS <- function (out_path = "~/mzion/outs",
enzyme = enzyme, maxn_fasta_seqs = maxn_fasta_seqs,
maxn_vmods_setscombi = maxn_vmods_setscombi,
min_len = min_len, max_len = max_len, max_miss = max_miss,
- knots = 50L)
+ knots = 5L)
- ms1_offsets <- find_ms1_offsets(n_13c = n_13c, ms1_notches = ms1_notches)
-
if (!bypass_ms2match) {
if (min_ms2mass < 5L)
warning("Maybe out of RAM at \"min_ms2mass < 5L\".")
@@ -1342,6 +1378,9 @@ matchMS <- function (out_path = "~/mzion/outs",
index_mgf_ms2 = index_mgf_ms2,
by_modules = by_modules,
ms1_offsets = ms1_offsets,
+ ms1_neulosses = ms1_neulosses,
+ maxn_neulosses_fnl = maxn_neulosses_fnl,
+ maxn_neulosses_vnl = maxn_neulosses_vnl,
# dummy for argument matching
fasta = fasta,
@@ -1383,8 +1422,7 @@ matchMS <- function (out_path = "~/mzion/outs",
min_ms2mass = min_ms2mass,
index_mgf_ms2 = index_mgf_ms2,
tally_ms2ints = tally_ms2ints,
- ms1_offsets = ms1_offsets,
-
+
# dummies
mgf_path = mgf_path,
maxn_vmods_per_pep = maxn_vmods_per_pep,
@@ -1416,7 +1454,7 @@ matchMS <- function (out_path = "~/mzion/outs",
if (!bypass_primatches)
hadd_primatches(out_path = out_path,
- ms1_offsets = ms1_offsets,
+ is_notched = is_notched,
add_ms2theos = add_ms2theos,
add_ms2theos2 = add_ms2theos2,
add_ms2moverzs = add_ms2moverzs,
@@ -1433,7 +1471,7 @@ matchMS <- function (out_path = "~/mzion/outs",
fdr_type = fdr_type,
min_len = min_len,
max_len = max_len,
- ms1_offsets = ms1_offsets,
+ is_notched = is_notched,
max_pepscores_co = max_pepscores_co,
min_pepscores_co = min_pepscores_co,
enzyme = enzyme,
@@ -1473,7 +1511,7 @@ matchMS <- function (out_path = "~/mzion/outs",
calc_peploc(out_path = out_path,
mod_indexes = mod_indexes,
locmods = locmods,
- ms1_offsets = ms1_offsets,
+ is_notched = is_notched,
topn_mods_per_seq = topn_mods_per_seq,
topn_seqs_per_query = topn_seqs_per_query)
}
@@ -2057,26 +2095,42 @@ checkMGF <- function (mgf_path = NULL, grp_args = NULL, error = c("stop", "warn"
#' Coerced \code{fixedmods} not considered.
#'
#' @inheritParams matchMS
-check_locmods <- function (fixedmods, varmods, locmods)
+check_locmods <- function (locmods, fixedmods, varmods, ms1_neulosses = NULL)
{
if (!length(locmods))
return(NULL)
- oks <- locmods %in% c(fixedmods, varmods)
-
- if (!all(oks)) {
- warning("Some \"locmods\" not in \"varmods\" or \"fixedmods\": ",
+ if (!is.null(ms1_neulosses)) {
+ if (sum(bads <- !ms1_neulosses %in% locmods)) {
+ warning("\nPLEASE READ: \n\n",
+ "\"ms1_neulosses\": ", paste(ms1_neulosses[bads], collapse = ", "),
+ " not found in \"locmods\": ", paste(locmods, collapse =, ""),
+ "\n!!! Consider matching some of the \"locmods\" setting to",
+ " \"ms1_neulosses\". !!!\n")
+
+ if (FALSE) {
+ nls <- lapply(ms1_neulosses, find_unimod)
+ nlresids <- lapply(nls, `[[`, "position_site")
+ nlresids <- unlist(nlresids, use.names = FALSE, recursive = FALSE)
+
+ vs <- lapply(varmods, find_unimod)
+ vresids <- lapply(vs, `[[`, "position_site")
+ vresids <- unlist(vresids, use.names = FALSE, recursive = FALSE)
+ }
+ }
+ }
+
+ if (!all(oks <- locmods %in% c(fixedmods, varmods))) {
+ warning("Ignore \"locmods\" not in \"varmods\" or \"fixedmods\": ",
paste(locmods[!oks], collapse = ", "))
locmods <- locmods[oks]
}
-
+
if (!length(locmods))
return(NULL)
- vids <- which(varmods %in% locmods)
-
# locmods are only among fixedmods
- if (!length(vids))
+ if (!length(vids <- which(varmods %in% locmods)))
stop("No \"varmods\" matched to \"locmods\": ", paste(locmods, collapse = ", "))
vmods <- find_modps(varmods)
@@ -2149,3 +2203,21 @@ check_fdr_group <- function (fdr_group = c("base", "all", "top3"),
}
+#' Checks the compatibility between ms1_notches and ms1_neulosses.
+#'
+#' @inheritParams matchMS
+check_notches <- function (ms1_notches, ms1_neulosses)
+{
+ n_notches <- length(ms1_notches)
+ n_neulosses <- length(ms1_neulosses)
+
+ if (n_neulosses) {
+ cdn_1 <- n_notches == 1L && ms1_notches != 0
+ cdn_2 <- n_notches > 1L
+
+ if (cdn_1 || cdn_2)
+ stop("Not support simultaneous non-trival `ms1_notches` and `ms1_neulosses`.")
+ }
+}
+
+
diff --git a/R/msmsmatches2.R b/R/msmsmatches2.R
index 084590e..759180e 100644
--- a/R/msmsmatches2.R
+++ b/R/msmsmatches2.R
@@ -20,12 +20,14 @@
#' @import parallel
ms2match <- function (mgf_path, aa_masses_all, out_path, .path_bin,
mod_indexes, type_ms2ions = "by", maxn_vmods_per_pep = 5L,
- maxn_sites_per_vmod = 3L, maxn_fnl_per_seq = 64L,
- maxn_vnl_per_seq = 64L, maxn_vmods_sitescombi_per_pep = 64L,
+ maxn_sites_per_vmod = 3L, maxn_fnl_per_seq = 3L,
+ maxn_vnl_per_seq = 3L, maxn_vmods_sitescombi_per_pep = 64L,
minn_ms2 = 6L, ppm_ms1 = 20L, ppm_ms2 = 20L,
min_mass = 200L, max_mass = 4500L, min_ms2mass = 115L,
quant = "none", ppm_reporters = 10L,
by_modules = TRUE, reframe_mgfs = FALSE, ms1_offsets = 0,
+ ms1_neulosses = NULL, maxn_neulosses_fnl = 1L,
+ maxn_neulosses_vnl = 1L,
# dummies
fasta, acc_type, acc_pattern,
@@ -109,7 +111,9 @@ ms2match <- function (mgf_path, aa_masses_all, out_path, .path_bin,
ppm_ms2_bin <- calc_threeframe_ppm(ppm_ms2)
pair_mgftheos(mgf_path = mgf_path, n_modules = length(aa_masses_all),
- ms1_offsets = ms1_offsets, by_modules = by_modules,
+ ms1_offsets = comb_ms1_offsets(ms1_offsets = ms1_offsets,
+ ms1_neulosses = ms1_neulosses),
+ by_modules = by_modules,
min_mass = min_mass, max_mass = max_mass,
ppm_ms1_bin = ppm_ms1_bin, .path_bin = .path_bin,
reframe_mgfs = reframe_mgfs, first_search = first_search)
@@ -140,21 +144,28 @@ ms2match <- function (mgf_path, aa_masses_all, out_path, .path_bin,
maxn_sites_per_vmod = maxn_sites_per_vmod)
ms2vmods_all <- lapply(ms1vmods_all, lapply, make_ms2vmods)
- # Searches
- df0 <- tibble::tibble(scan_title = integer(), ms1_moverz = numeric(),
- ms1_mass = numeric(), ms1_int = numeric(),
- ms1_charge = character(), ret_time = numeric(),
- scan_num = character(), raw_file = integer(),
- ms2_moverz = list(list()),
- ms2_int = list(list()),
- ms2_n = integer(), frame = numeric(),
+ df0 <- tibble::tibble(scan_title = integer(), raw_file = integer(),
+ pep_mod_group = integer(), pep_exp_mz = numeric(),
+ pep_exp_mr = numeric(), pep_tot_int = numeric(),
+ pep_exp_z = numeric(), pep_ret_range = numeric(),
+ pep_scan_num = character(),
+ pep_ms2_moverzs = list(list()),
+ pep_ms2_ints = list(list()),
+ pep_n_ms2 = integer(),
+ rptr_moverz = list(list()),
+ rptr_int = list(list()),
+ pep_ms1_offset = numeric(),
matches = list(list()),
- pep_isdecoy = logical())
+ pep_fmod = character(),
+ pep_vmod = character())
hms2match(aa_masses_all = aa_masses_all,
funs_ms2 = funs_ms2,
ms1vmods_all = ms1vmods_all,
ms2vmods_all = ms2vmods_all,
+ ms1_neulosses = ms1_neulosses,
+ maxn_neulosses_fnl = maxn_neulosses_fnl,
+ maxn_neulosses_vnl = maxn_neulosses_vnl,
mod_indexes = mod_indexes,
mgf_path = mgf_path,
out_path = out_path,
@@ -235,7 +246,7 @@ reverse_seqs <- function (seqs)
#' @param reframe_mgfs Logical; if TRUE, recalculates the frame indexes of MGFs
#' @param knots The number of knots for spline fits.
#' @inheritParams matchMS
-calib_mgf <- function (mgf_path = NULL, aa_masses_all = NULL,out_path = NULL,
+calib_mgf <- function (mgf_path = NULL, aa_masses_all = NULL, out_path = NULL,
.path_bin, mod_indexes = NULL, type_ms2ions = "by",
maxn_vmods_per_pep = 5L,maxn_sites_per_vmod = 3L,
maxn_fnl_per_seq = 3L, maxn_vnl_per_seq = 3L,
@@ -248,7 +259,7 @@ calib_mgf <- function (mgf_path = NULL, aa_masses_all = NULL,out_path = NULL,
acc_pattern = NULL, topn_ms2ions = 100L,
fixedmods = NULL, varmods = NULL, enzyme = "trypsin_p",
maxn_fasta_seqs = 200000L, maxn_vmods_setscombi = 512L,
- min_len = 7L, max_len = 40L, max_miss = 2L, knots = 50L)
+ min_len = 7L, max_len = 40L, max_miss = 2L, knots = 5L)
{
on.exit(
if (exists(".savecall", envir = fun_env)) {
@@ -280,12 +291,9 @@ calib_mgf <- function (mgf_path = NULL, aa_masses_all = NULL,out_path = NULL,
return(NULL)
}
- # may need to delete mgf_queries_[...].rds when changing, e.g., from
- # ppm_ms1 = 20 to 10; or save a copy of the original mgf_queries
-
## the first search
tempdir <- file.path(out_path, "temp")
- pat_th <- if (by_modules) "^expttheo_\\d+.*\\.rds$" else "^mgftheo_\\d+.*\\.rds$"
+ pat_th <- if (by_modules) "^(theo|expt)_\\d+.*\\.rds$" else "^mgftheo_\\d+.*\\.rds$"
pat_im <- "^ion_matches_\\d+.*\\.rds$"
fs_th <- list.files(mgf_path, pattern = pat_th, full.names = TRUE)
fs_im <- list.files(tempdir, pattern = pat_im, full.names = TRUE)
@@ -307,25 +315,29 @@ calib_mgf <- function (mgf_path = NULL, aa_masses_all = NULL,out_path = NULL,
fi_mi2 <- file.path(out_path, "Calls", "mod_indexes.txt")
file.rename(fi_aa, fi_aa2)
file.rename(fi_mi, fi_mi2)
-
+
ms2match(mgf_path = mgf_path,
aa_masses_all = aa_masses_all,
out_path = out_path,
.path_bin = .path_bin,
mod_indexes = mod_indexes,
type_ms2ions = type_ms2ions,
- maxn_vmods_per_pep = maxn_vmods_per_pep,
- maxn_sites_per_vmod = maxn_sites_per_vmod,
- maxn_fnl_per_seq = maxn_fnl_per_seq,
- maxn_vnl_per_seq = maxn_vnl_per_seq,
- maxn_vmods_sitescombi_per_pep = maxn_vmods_sitescombi_per_pep,
+ maxn_vmods_per_pep = 1L,
+ maxn_sites_per_vmod = 1L,
+ maxn_fnl_per_seq = 1L,
+ maxn_vnl_per_seq = 1L,
+ ms1_offsets = 0,
+ ms1_neulosses = NULL,
+ maxn_neulosses_fnl = 1L,
+ maxn_neulosses_vnl = 1L,
+ maxn_vmods_sitescombi_per_pep = 1L,
minn_ms2 = minn_ms2,
ppm_ms1 = ppm_ms1,
ppm_ms2 = ppm_ms2,
min_mass = min_mass,
max_mass = max_mass,
min_ms2mass = min_ms2mass,
- quant = quant,
+ quant = "none",
ppm_reporters = ppm_reporters,
reframe_mgfs = reframe_mgfs,
index_mgf_ms2 = index_mgf_ms2,
@@ -345,19 +357,56 @@ calib_mgf <- function (mgf_path = NULL, aa_masses_all = NULL,out_path = NULL,
first_search = TRUE,
.savecall = FALSE)
+ calc_pepscores(topn_ms2ions = topn_ms2ions,
+ type_ms2ions = type_ms2ions,
+ target_fdr = .01,
+ min_len = min_len,
+ max_len = max_len,
+ ppm_ms2 = ppm_ms2,
+ soft_secions = FALSE,
+ out_path = out_path,
+ min_ms2mass = min_ms2mass,
+ index_mgf_ms2 = index_mgf_ms2,
+ tally_ms2ints = TRUE,
+
+ # dummies
+ mgf_path = mgf_path,
+ maxn_vmods_per_pep = 1L,
+ maxn_sites_per_vmod = 1L,
+ maxn_vmods_sitescombi_per_pep = 1L,
+ minn_ms2 = minn_ms2,
+ ppm_ms1 = ppm_ms1,
+ quant = quant,
+ ppm_reporters = ppm_reporters,
+ fasta = fasta,
+ acc_type = acc_type,
+ acc_pattern = acc_pattern,
+ fixedmods = fixedmods,
+ varmods = varmods,
+ enzyme = enzyme,
+ maxn_fasta_seqs = maxn_fasta_seqs,
+ maxn_vmods_setscombi = maxn_vmods_setscombi,
+ add_ms2theos = FALSE,
+ add_ms2theos2 = FALSE,
+ add_ms2moverzs = FALSE,
+ add_ms2ints = FALSE,
+ by_modules = by_modules,
+ digits = 4L)
+
file.rename(fi_aa2, fi_aa)
file.rename(fi_mi2, fi_mi)
## mass calibration
fs_mgf <- list.files(mgf_path, "^mgf_queries.*\\.rds$")
- fi_ion <- file.path(out_path, "temp", "ion_matches_1.rds")
-
+ fi_ion <- file.path(out_path, "temp", "prescores_1_1.rds")
+
if (!length(fs_mgf))
stop("No `mgf_queries` files found for calibrations.")
if (!file.exists(fi_ion))
stop("No `ion_matches` files found for calibrations.")
- df <- qs::qread(fi_ion)
+ df <- qs::qread(fi_ion)
+ df <- df[with(df, pep_prob <= .01), ]
if (!"raw_file" %in% names(df))
stop("Column not found in search results: `raw_file`")
@@ -414,9 +463,11 @@ calib_mgf <- function (mgf_path = NULL, aa_masses_all = NULL,out_path = NULL,
#' @inheritParams calib_mgf
calib_ms1 <- function (filename, df = NULL, mgf_path = NULL, out_path = NULL,
ppm_ms1 = 20L, min_mass = 200L, max_mass = 4500L,
- knots = 50L)
+ knots = 5L)
{
- mgfs <- qs::qread(file.path(mgf_path, filename))
+ n_row <- nrow(df)
+ knots <- max(min(floor(n_row/20L), knots), 4L)
+ mgfs <- qs::qread(file.path(mgf_path, filename))
# subsets by minn_ms2 and ms1_int
if (FALSE) {
@@ -432,41 +483,79 @@ calib_ms1 <- function (filename, df = NULL, mgf_path = NULL, out_path = NULL,
df <- df[minn_ok, ]
}
- # x[[1]]: no nested structures, as the first search is always
- # against the all-fixed modifications
- theo_ms1 <- lapply(df[["matches"]], function (x) {
- attr(x[[1]], "theo_ms1", exact = TRUE)
- })
- theo_ms1 <- .Internal(unlist(theo_ms1, recursive = FALSE, use.names = FALSE))
+ diff_ms1 <- (df[["pep_exp_mr"]] - df[["theo_ms1"]])/df[["theo_ms1"]] * 1E6
+ mdiff <- median(diff_ms1, na.rm = TRUE)/1E6
+
+ if (n_row <= 100L || mdiff <= 2e-6) {
+ mgfs[["ms1_mass"]] <- mgfs[["ms1_mass"]] - mdiff
+ post_calib(mgfs, min_mass, max_mass, mgf_path, filename)
+ .savecall <- TRUE
+ return(NULL)
+ }
- diff_ms1 <- (df[["pep_exp_mr"]] - theo_ms1)/theo_ms1 * 1E6
- ret_time <- df[["pep_ret_range"]]
+ ret_time <- df[["pep_ret_range"]]
ppm_ms1_bin <- calc_threeframe_ppm(ppm_ms1)
- fit_ns <- tryCatch(
- lm(diff_ms1 ~ splines::ns(ret_time, knots)),
- error = function(e) NA)
+ fit_ns <- find_optns(diff_ms1, ret_time, knots)
+ fit_bs <- find_optbs(diff_ms1, ret_time, knots)
+
+ if (all(is.na(fit_ns))) {
+ if (all(is.na(fit_bs))) {
+ mgfs[["ms1_mass"]] <- mgfs[["ms1_mass"]] - mdiff
+ post_calib(mgfs, min_mass, max_mass, mgf_path, filename)
+ .savecall <- TRUE
+ return(NULL)
+ }
+ else
+ fit_ns <- fit_bs
+ }
+ else if (all(is.na(fit_bs)))
+ fit_bs <- fit_ns
- fit_bs <- tryCatch(
- lm(diff_ms1 ~ splines::bs(ret_time, knots)),
- error = function(e) NA)
+ bad_ns <- anyNA(coef(fit_ns))
+ bad_bs <- anyNA(coef(fit_bs))
- res_ns <- if (class(fit_ns) == "lm") sum(resid(fit_ns)^2, na.rm = TRUE) else Inf
- res_bs <- if (class(fit_bs) == "lm") sum(resid(fit_bs)^2, na.rm = TRUE) else Inf
- fit <- if (res_ns <= res_bs) fit_ns else fit_bs
+ if (bad_ns) {
+ if (bad_bs) {
+ mgfs[["ms1_mass"]] <- mgfs[["ms1_mass"]] - mdiff
+ post_calib(mgfs, min_mass, max_mass, mgf_path, filename)
+ .savecall <- TRUE
+ return(NULL)
+ }
+ else
+ fit_ns <- fit_bs
+ }
+ else if (bad_bs)
+ fit_bs <- fit_ns
+ res_ns <- if (class(fit_ns) == "lm") mean(resid(fit_ns)^2, na.rm = TRUE) else Inf
+ res_bs <- if (class(fit_bs) == "lm") mean(resid(fit_bs)^2, na.rm = TRUE) else Inf
+ fit <- if (res_ns <= res_bs) fit_ns else fit_bs
+
# (keeps the original df$ms1_mass -> can later infer mass deltas)
# charges <- get_ms1charges(df[["ms1_charge"]])
# df[["ms1_moverz"]] <- (df[["ms1_mass"]] + 1.00727647 * charges)/charges
## Update mgf
rt <- mgfs[["ret_time"]]
- oks_le <- rt >= min(ret_time, na.rm = TRUE)
- oks_gr <- rt <= max(ret_time, na.rm = TRUE)
+ min_rt <- min(ret_time, na.rm = TRUE)
+ max_rt <- max(ret_time, na.rm = TRUE)
+
+ if ((min_rt2 <- min_rt * 1.05) < (max_rt2 <- max_rt / 1.05)) {
+ oks_le <- rt >= min_rt2
+ oks_gr <- rt <= max_rt2
+ }
+ else {
+ oks_le <- rt >= min_rt
+ oks_gr <- rt <= max_rt
+ }
+
oks <- oks_le & oks_gr
ms1err <- predict.lm(fit, newdata = data.frame(ret_time = rt[oks])) / 1E6
- ms1oks <- mgfs[["ms1_mass"]][oks]
- mgfs[["ms1_mass"]][oks] <- ms1oks - ms1oks * ms1err
+ mgfs[["ms1_mass"]][oks] <- mgfs[["ms1_mass"]][oks] * (1 - ms1err)
+
+ uls <- rt[!oks]
+ mgfs[["ms1_mass"]][uls] <- mgfs[["ms1_mass"]][uls] - mdiff
# beyond the boundary of RT
if (FALSE) {
@@ -481,27 +570,118 @@ calib_ms1 <- function (filename, df = NULL, mgf_path = NULL, out_path = NULL,
}
## update MGF
- mgfs <- mgfs %>%
- dplyr::arrange(ms1_mass) %>%
- dplyr::filter(ms1_mass >= min_mass, ms1_mass <= max_mass)
-
# charges <- get_ms1charges(mgfs[["ms1_charge"]])
# mgfs[["ms1_moverz"]] <- (mgfs[["ms1_mass"]] + 1.00727647 * charges)/charges
-
+ post_calib(mgfs, min_mass, max_mass, mgf_path, filename)
.savecall <- TRUE
+ invisible(NULL)
+}
+
+
+#' Post MS1 calibrations.
+#'
+#' @param mgfs MGF data.
+#' @param filename An MGF file name
+#' @inheritParams matchMS
+post_calib <- function (mgfs, min_mass, max_mass, mgf_path, filename)
+{
+ mgfs <- mgfs |>
+ dplyr::arrange(ms1_mass) |>
+ dplyr::filter(ms1_mass >= min_mass, ms1_mass <= max_mass)
+
qs::qsave(mgfs, file.path(mgf_path, filename), preset = "fast")
}
+#' Finds the optimal natural spline.
+#'
+#' @param diff_ms1 The differences between experimental and theoretical
+#' precursor masses.
+#' @param ret_time Retention timm.
+#' @param knots The number of knots.
+find_optns <- function (diff_ms1, ret_time, knots = 50L)
+{
+ if (knots <= 3L)
+ return(NA)
+
+ ans <- tryCatch(
+ lm(diff_ms1 ~ splines::ns(ret_time, knots)),
+ error = function(e) NA)
+
+ if (all(is.na(ans)))
+ return(NA)
+
+ if (anyNA(coef(ans)))
+ find_optns(diff_ms1, ret_time, floor(knots/1.5))
+ else
+ ans
+}
-#' Finds offsets in precursor masses.
+
+#' Finds the optimal natural spline.
#'
+#' @inheritParams find_optns
+find_optbs <- function (diff_ms1, ret_time, knots = 50L)
+{
+ if (knots <= 3L)
+ return(NA)
+
+ ans <- tryCatch(
+ lm(diff_ms1 ~ splines::bs(ret_time, knots)),
+ error = function(e) NA)
+
+ if (all(is.na(ans)))
+ return(NA)
+
+ if (anyNA(coef(ans)))
+ find_optbs(diff_ms1, ret_time, floor(knots/1.5))
+ else
+ ans
+}
+
+
+#' Finds off-sets in precursor masses.
+#'
#' @inheritParams matchMS
+#' @return A vector of mass off-sets.
find_ms1_offsets <- function (n_13c = 0L, ms1_notches = 0)
{
+ if (dups <- anyDuplicated(n_13c))
+ stop("At least one duplicated values in `n_13c`: ",
+ n_13c[dups])
+
+ if (dups <- anyDuplicated(ms1_notches))
+ stop("At least one duplicated values in `ms1_offsets`: ",
+ ms1_notches[dups])
+
offsets_13c <- if (length(n_13c)) n_13c * 1.00335483 else NULL
ms1_offsets <- unique(c(0, offsets_13c, ms1_notches))
- round(ms1_offsets, digits = 4L)
+ ms1_offsets <- round(ms1_offsets, digits = 4L)
+}
+
+
+#' Combines off-sets in precursor masses (notches and neutral losses).
+#'
+#' The return contains no information of Unimod titles and positions since it is
+#' only used for pairing with experimental MGF data.
+#'
+#' @param ms1_offsets Precursor mass off-sets (notches, not neutral losses).
+#' @inheritParams matchMS
+#' @return A vector of mass off-sets.
+comb_ms1_offsets <- function (ms1_offsets = 0, ms1_neulosses = NULL)
+{
+ if (dups <- anyDuplicated(ms1_neulosses))
+ stop("At least one duplicated values in `ms1_neulosses`: ",
+ ms1_neulosses[dups])
+
+ if ((!(nnl <- length(ms1_neulosses))) || (nnl == 1L && nnl == 0))
+ return(ms1_offsets)
+
+ nls <- extract_umods(ms1_neulosses)
+ nls <- unique(unlist(lapply(nls, `[[`, "nl")))
+ nls <- -nls[nls != 0]
+
+ round(unique(c(ms1_offsets, nls)), digits = 4L)
}
diff --git a/R/quant2.R b/R/quant2.R
index 4874c25..0836d12 100644
--- a/R/quant2.R
+++ b/R/quant2.R
@@ -5,7 +5,7 @@
#' @inheritParams matchMS
#' @inheritParams calc_pepscores
hcalc_tmtint <- function (df, quant = "tmt10", ppm_reporters = 10L, idx = 1L,
- ms1_offsets = 0, out_path = NULL)
+ out_path = NULL)
{
df <- df[, c("raw_file", "pep_mod_group", "pep_scan_num", "rptr_moverz",
"rptr_int")]
diff --git a/R/scores.R b/R/scores.R
index 310c3c9..a0fb244 100644
--- a/R/scores.R
+++ b/R/scores.R
@@ -598,8 +598,6 @@ calc_pepprobs_i <- function (df, topn_ms2ions = 100L, type_ms2ions = "by",
#' Calculates the scores of peptides.
#'
#' @param tally_ms2ints Logical; tally MS2 intensities or not.
-#' @param ms1_offsets Off-sets in precursor masses in relative to the original
-#' values in MGFs.
#' @inheritParams matchMS
#' @import parallel
calc_pepscores <- function (topn_ms2ions = 100L, type_ms2ions = "by",
@@ -608,7 +606,7 @@ calc_pepscores <- function (topn_ms2ions = 100L, type_ms2ions = "by",
soft_secions = FALSE,
out_path = "~/mzion/outs",
min_ms2mass = 115L, index_mgf_ms2 = FALSE,
- tally_ms2ints = TRUE, ms1_offsets = 0,
+ tally_ms2ints = TRUE,
mgf_path, maxn_vmods_per_pep = 5L, maxn_sites_per_vmod = 3L,
maxn_vmods_sitescombi_per_pep = 64L, minn_ms2 = 6L,
ppm_ms1 = 20L, quant = "none", ppm_reporters = 10,
@@ -749,7 +747,6 @@ calc_pepscores <- function (topn_ms2ions = 100L, type_ms2ions = "by",
d2 = d2,
index_mgf_ms2 = index_mgf_ms2,
tally_ms2ints = tally_ms2ints,
- ms1_offsets = ms1_offsets,
add_ms2theos = add_ms2theos,
add_ms2theos2 = add_ms2theos2,
add_ms2moverzs = add_ms2moverzs,
@@ -830,20 +827,49 @@ order_fracs <- function (type = "list_table", tempdir, by_modules = TRUE)
}
+#' Order fractions
+#'
+#' Three layers: module_notch_frac
+#'
+#' @param type The type of files.
+#' @param tempdir A temporary directory containing the files.
+#' @param by_modules Logical; if TRUE, performs searches by modules.
+order_fracs3 <- function (type = "list_table", tempdir, by_modules = TRUE)
+{
+ files <- if (by_modules)
+ list.files(tempdir, pattern = paste0("^", type, "_\\d+_\\d+_\\d+.*"))
+ else
+ list.files(tempdir, pattern = paste0("^", type, "_\\d+(_){0,1}\\d*.*"))
+
+ # all NA's if by_modules = FALSE
+ l1 <- as.integer(gsub(paste0("^", type, "_(\\d+).*"), "\\1", files))
+ l2 <- as.integer(gsub(paste0("^", type, "_\\d+_(\\d+).*"), "\\1", files))
+ l3 <- as.integer(gsub(paste0("^", type, "_\\d+_\\d+_(\\d+).*"), "\\1", files))
+
+ df <- data.frame(l1 = l1, l2 = l2, l3 = l3)
+ df <- dplyr::arrange(df, l1, l2, l3)
+ df$fs <- with(df, paste(l1, l2, l3, sep = "_"))
+ df$fs <- paste0(type, "_", df$fs, ".rds")
+
+ split(df$fs, df$l1)
+}
+
+
+
#' Combines results from fractions
#'
-#' @param files A list of file names
-#' @param tempdir A temporary directory containing the files
+#' @param files A list of file names.
+#' @param tempdir A temporary directory containing the files.
+#' @param is_notched Logical; is a search with MS1 notches or not.
#' @param sc_path An output path
-#' @inheritParams calc_pepscores
-combine_fracs <- function (files, tempdir, sc_path, ms1_offsets = 0)
+combine_fracs <- function (files, tempdir, sc_path, is_notched = FALSE)
{
out_nm <- gsub("_\\d+\\.rds$", ".rds", files[[1]])
df <- lapply(file.path(tempdir, files), qs::qread)
df <- dplyr::bind_rows(df)
# uniq_id does not contain information of pep_ms1_offset
- if (length(ms1_offsets) > 1L)
+ if (is_notched)
df <- df[!duplicated.default(df[["uniq_id"]]), ]
qs::qsave(df, file.path(sc_path, out_nm))
@@ -914,7 +940,7 @@ calcpepsc <- function (file, im_path, pep_fmod_all, pep_vmod_all,
topn_ms2ions = 100L, type_ms2ions = "by",
ppm_ms2 = 20L, soft_secions = FALSE, out_path = NULL,
min_ms2mass = 115L, d2 = 1E-5, index_mgf_ms2 = FALSE,
- tally_ms2ints = TRUE, ms1_offsets = 0,
+ tally_ms2ints = TRUE,
add_ms2theos = FALSE, add_ms2theos2 = FALSE,
add_ms2moverzs = FALSE, add_ms2ints = FALSE,
quant = "none", ppm_reporters = 10,
@@ -1042,7 +1068,6 @@ calcpepsc <- function (file, im_path, pep_fmod_all, pep_vmod_all,
quant = quant,
ppm_reporters = ppm_reporters,
idx = idx,
- ms1_offsets = ms1_offsets,
out_path = im_path)
qs::qsave(df[, cols_lt, drop = FALSE],
@@ -1059,9 +1084,10 @@ calcpepsc <- function (file, im_path, pep_fmod_all, pep_vmod_all,
#' Helper of \link{add_primatches}
#'
+#' @param is_notched Logical; is a search with MS1 notches or not.
#' @inheritParams matchMS
#' @inheritParams calc_pepscores
-hadd_primatches <- function (out_path = NULL, ms1_offsets = 0,
+hadd_primatches <- function (out_path = NULL, is_notched = FALSE,
add_ms2theos = FALSE, add_ms2theos2 = FALSE,
add_ms2moverzs = FALSE, add_ms2ints = FALSE,
by_modules = TRUE, index_mgf_ms2 = FALSE)
@@ -1119,7 +1145,7 @@ hadd_primatches <- function (out_path = NULL, ms1_offsets = 0,
}, ms_files, names(ms_files))
lapply(order_fracs("reporters", tempdir, by_modules),
- combine_fracs, tempdir, tempdir, ms1_offsets)
+ combine_fracs, tempdir, tempdir, is_notched)
message("Completed theoretical MS2 m/z and intensity values: ", Sys.time())
@@ -1653,12 +1679,12 @@ find_probco_valley <- function (prob_cos, guess = 12L)
#' Prepares target-decoy data.
#'
#' @param td A data frame of targets and decoys (for Percolator).
-#' @param only_notch_zero Logical; if TRUE, use only data at \code{ms1_offsets =
-#' 0}.
+#' @param is_notched Logical; is a search with MS1 notches or not.
+#' @param only_notch_zero Logical; if TRUE, use only data at the zero notch.
#' @inheritParams matchMS
prep_pepfdr_td <- function (td = NULL, out_path, enzyme = "trypsin_p",
nes_fdr_group = "base", fdr_group = "base",
- ms1_offsets = 0, only_notch_zero = TRUE)
+ is_notched = FALSE, only_notch_zero = TRUE)
{
files <- list.files(path = file.path(out_path, "temp"),
pattern = "^pepscores_", full.names = TRUE)
@@ -1672,8 +1698,8 @@ prep_pepfdr_td <- function (td = NULL, out_path, enzyme = "trypsin_p",
td <- dplyr::bind_rows(td)
}
- if (length(ms1_offsets) > 1L && only_notch_zero)
- td <- td[with(td, abs(pep_ms1_offset) <= 1e-4), ]
+ if (is_notched && only_notch_zero)
+ td <- td[with(td, pep_ms1_offset == 0), ]
cts <- dplyr::count(dplyr::group_by(td, "pep_mod_group"), pep_mod_group)
max_i <- cts$pep_mod_group[which.max(cts$n)[[1]]]
@@ -1771,8 +1797,7 @@ keep_pepfdr_best <- function (td, cols = c("pep_scan_num", "raw_file"))
#' @param target_fdr Numeric; the levels of false-discovery rate (FDR).
#' @param fdr_type Character string; the type of FDR for controlling.
#' @param fct_score A trivial factor converting p-values to scores.
-#' @param ms1_offsets Off-sets in precursor masses in relative to the original
-#' values in MGFs.
+#' @param is_notched Logical; is a search with MS1 notches or not.
#' @inheritParams matchMS
#' @examples
#' \donttest{
@@ -1787,7 +1812,7 @@ keep_pepfdr_best <- function (td, cols = c("pep_scan_num", "raw_file"))
#' }
#' }
calc_pepfdr <- function (target_fdr = .01, fdr_type = "protein",
- min_len = 7L, max_len = 40L, ms1_offsets = 0,
+ min_len = 7L, max_len = 40L, is_notched = FALSE,
max_pepscores_co = 50, min_pepscores_co = 0,
enzyme = "trypsin_p", fdr_group = "base",
nes_fdr_group = "base", fct_score = 10, out_path)
@@ -1798,7 +1823,7 @@ calc_pepfdr <- function (target_fdr = .01, fdr_type = "protein",
enzyme = enzyme,
nes_fdr_group = nes_fdr_group,
fdr_group = fdr_group,
- ms1_offsets = ms1_offsets,
+ is_notched = is_notched,
only_notch_zero = TRUE)
# back-compatibility to new column keys (e.g. scan_num -> pep_scan_num)
@@ -1814,7 +1839,9 @@ calc_pepfdr <- function (target_fdr = .01, fdr_type = "protein",
return(data.frame(pep_len = seqs, pep_prob_co = prob_cos))
}
- td <- keep_pepfdr_best(td)
+ # not cols = c("pep_scan_num", "raw_file", "pep_ms1_offset")
+ # as only use td at pep_ms1_offset == 0
+ td <- keep_pepfdr_best(td, cols = c("pep_scan_num", "raw_file"))
qs::qsave(td, file.path(out_path, "temp", "td_pepfdr.rds"), preset = "fast")
# ---
@@ -2192,11 +2219,11 @@ post_pepfdr <- function (prob_cos = NULL, out_path = NULL)
dplyr::mutate(pep_issig = ifelse(pep_prob <= pep_prob_co, TRUE, FALSE),
pep_adjp = p.adjust(pep_prob, "BH"))
- adjp_cos <- purrr::map_dbl(prob_cos$pep_prob_co, function (x) {
+ adjp_cos <- lapply(prob_cos$pep_prob_co, function (x) {
row <- which.min(abs(log10(td$pep_prob/x)))
td[row, ]$pep_adjp
- })
-
+ })
+ adjp_cos <- unlist(adjp_cos, recursive = FALSE, use.names = FALSE)
prob_cos <- dplyr::bind_cols(prob_cos, pep_adjp_co = adjp_cos)
td <- td |>
@@ -2719,13 +2746,12 @@ match_ex2th2 <- function (expt, theo, min_ms2mass = 115L, d = 1E-5,
#' @param out_path An output path.
#' @param mod_indexes Integer; the indexes of fixed and/or variable
#' modifications.
-#' @param ms1_offsets Off-sets in precursor masses in relative to the original
-#' values in MGFs.
+#' @param is_notched Logical; is a search with MS1 notches or not.
#' @inheritParams matchMS
#' @rawNamespace import(data.table, except = c(last, first, between, transpose,
#' melt, dcast))
calc_peploc <- function (x = NULL, out_path = NULL, mod_indexes = NULL,
- ms1_offsets = 0,
+ is_notched = FALSE,
locmods = c("Phospho (S)", "Phospho (T)", "Phospho (Y)"),
topn_mods_per_seq = 3L, topn_seqs_per_query = 3L)
{
@@ -2757,7 +2783,7 @@ calc_peploc <- function (x = NULL, out_path = NULL, mod_indexes = NULL,
# finds the rank-1 pep_ms1_offset at each query_id
# keeps only the rank-1 pep_ms1_offset group at each query_id
- if (length(ms1_offsets) > 1L) {
+ if (is_notched) {
x[, query_id := paste(pep_isdecoy, pep_scan_num, raw_file, sep = ".")]
if (para) {
@@ -3054,24 +3080,18 @@ calcpeprank_3 <- function (x0)
#' @param x A data.table object
find_bestnotch <- function (x)
{
- x[, pep_rank0 := data.table::frank(-pep_score, ties.method = "min"),
+ # favors pep_ms1_offset == 0
+ x <- data.table::rbindlist(list(x[x$pep_ms1_offset == 0],
+ x[x$pep_ms1_offset != 0]),
+ use.names = FALSE)
+ x[, pep_rank0 := data.table::frank(-pep_score, ties.method = "first"),
by = list(query_id)]
# the best query at each query_id
g <- x[, c("pep_rank0", "query_id", "pep_ms1_offset")]
g <- g[g$pep_rank0 == 1L, ]
g[["pep_rank0"]] <- NULL
- g[, id := seq_len(.N), by = query_id]
- g <- g[g$id == 1L, ]
- g[["id"]] <- NULL
- if (FALSE) {
- if (length(us <- unique(g$pep_ms1_offset)) > 1L) {
- i <- if (length(ok <- which(abs(us) < 1e-4)) == 1L) ok else 1L
- g <- g[g$pep_ms1_offset == us[[i]], ]
- }
- }
-
g[["keep"]] <- TRUE
g[, uid := paste(query_id, pep_ms1_offset, sep = ".")]
g[["pep_ms1_offset"]] <- g[["query_id"]] <- NULL
diff --git a/R/unimods.R b/R/unimods.R
index 60efe02..47a0a8f 100644
--- a/R/unimods.R
+++ b/R/unimods.R
@@ -76,64 +76,58 @@ parse_unimod <- function (unimod = "Carbamyl (M)")
# (assumed) no space in `title`
# title <- gsub("(.*)\\s\\([^\\(]*\\)$", "\\1", unimod)
- title <- gsub("^([^ ]+?) .*", "\\1", unimod)
-
- pos_site <- unimod %>%
- gsub("^[^ ]+", "", .) %>%
- gsub("^[^\\(]+[\\(]*([^\\)]*)[\\)]*$", "\\1", .)
+ title <- gsub("^([^ ]+?) .*", "\\1", unimod)
+ pos_site <- gsub("^[^ ]+", "", unimod)
+ pos_site <- gsub("^[^\\(]+[\\(]*([^\\)]*)[\\)]*$", "\\1", pos_site)
if (grepl("=", pos_site)) {
- pos <- pos_site %>%
- gsub("^([^=]+?)[=].*", "\\1", .) %>%
- gsub("^[ ]*", "\\1", .) %>%
- gsub(" *$", "", .)
-
- site <- pos_site %>%
- gsub("^[^=]+?[=](.*)", "\\1", .) %>%
- gsub("^[ ]*", "\\1", .) %>%
- gsub(" *$", "", .)
+ pos <- gsub("^([^=]+?)[=].*", "\\1", pos_site)
+ pos <- gsub("^[ ]*", "\\1", pos)
+ pos <- gsub(" *$", "", pos)
+ site <- gsub("^[^=]+?[=](.*)", "\\1", pos_site)
+ site <- gsub("^[ ]*", "\\1", site)
+ site <- gsub(" *$", "", site)
}
else {
- pos <- "."
+ pos <- "."
site <- pos_site
}
- if (site == "") site = "."
+ if (site == "")
+ site = "."
if (site %in% c("Protein N-term", "Protein C-term",
"Anywhere N-term", "Anywhere C-term",
"N-term", "C-term")) {
- pos <- site
- site <- site %>% gsub("^(Protein|Anywhere) ", "", .)
+ pos <- site
+ site <- gsub("^(Protein|Anywhere) ", "", site)
}
# standardize `position`
pos <- gsub("^([NC]){1}-term", "Any \\1-term", pos)
- if (pos %in% c(".", "")) pos <- "Anywhere"
+ if (pos %in% c(".", ""))
+ pos <- "Anywhere"
pos_allowed <- c("Anywhere", "Protein N-term", "Protein C-term",
"Any N-term", "Any C-term")
if (! pos %in% pos_allowed)
stop("`pos` needs to be one of ",
- paste0("\n '", pos_allowed, collapse = "'"),
- "'")
-
+ paste0("\n '", pos_allowed, collapse = "'"), "'")
+
# standardize terminal sites
if (site == ".") {
- if (pos %in% c("Protein N-term", "Any N-term")) {
+ if (pos %in% c("Protein N-term", "Any N-term"))
site <- "N-term"
- }
- else if (pos %in% c("Protein C-term", "Any C-term")) {
+ else if (pos %in% c("Protein C-term", "Any C-term"))
site <- "C-term"
- }
}
if (pos == "Anywhere" && site == ".")
stop("'position' or 'site' cannot be both 'Anywhere'.")
- invisible(list(title = title, position = pos, site = site))
+ list(title = title, position = pos, site = site)
}
@@ -182,36 +176,26 @@ find_unimod <- function (unimod = "Carbamidomethyl (C)",
node_delta <- xml2::xml_find_all(this_mod, "umod:delta")
monomass <- as.numeric(xml2::xml_attr(node_delta, "mono_mass"))
- if (!(length(monomass) == 1L))
+ if (length(monomass) != 1L)
stop("The length of `mono_mass` is not one.")
# sites and positions
node_specs <- xml2::xml_find_all(this_mod, "umod:specificity")
attrs_specs <- xml2::xml_attrs(node_specs)
- sites <- attrs_specs %>%
- lapply(`[`, c("site")) %>%
- unlist()
-
- positions <- attrs_specs %>%
- lapply(`[`, c("position")) %>%
- unlist()
-
+ sites <- unlist(lapply(attrs_specs, `[`, "site"))
+ positions <- unlist(lapply(attrs_specs, `[`, "position"))
names(sites) <- positions
- idx_ps <- which(sites == site & positions == position)
+ idx_ps <- which(sites == site & positions == position)
+ sites <- sites[idx_ps]
+ empties <- unlist(lapply(sites, is.null))
+ sites <- sites[!empties]
- sites <- sites[idx_ps] %>%
- .[purrr::map_lgl(., function (x) !is.null(x))]
-
- local({
- len_sites <- length(sites)
-
- if (len_sites > 1L)
- stop("Multiple matches in site and position for '", unimod, "'.")
- else if (len_sites == 0L)
- stop("No matches in site and position for '", unimod, "'.")
- })
+ if ((len_sites <- length(sites)) > 1L)
+ stop("Multiple matches in site and position for '", unimod, "'.")
+ else if (len_sites == 0L)
+ stop("No matches in site and position for '", unimod, "'.")
# neutral loss
idxes_nl <- grep("NeutralLoss", node_specs)
@@ -219,18 +203,16 @@ find_unimod <- function (unimod = "Carbamidomethyl (C)",
if (length(idxes_nl)) {
nodes_nl <- xml2::xml_children(node_specs[idxes_nl])
-
- neulosses <- xml2::xml_attr(nodes_nl, "mono_mass") %>%
- as.numeric() %>%
- sort() # ensures the first is 0
+ # ensures the first is 0
+ neulosses <- sort(as.numeric(xml2::xml_attr(nodes_nl, "mono_mass")))
}
else
neulosses <- 0
- invisible(list(title = title,
- monomass = monomass,
- position_site = sites,
- nl = neulosses))
+ list(title = title,
+ monomass = monomass,
+ position_site = sites,
+ nl = neulosses)
}
@@ -261,51 +243,52 @@ hfind_unimod <- function (xml_files = c("master.xml", "custom.xml"), unimod)
stop("Modification not found: '", title, "'.\n",
"For example, use 'Acetyl' (title) instead of 'Acetylation' (full_name).")
- invisible(this_mod)
+ this_mod
}
#' Tabulates \href{https://www.unimod.org/}{Unimod} entries.
#'
-#' For convenience summary of the \code{title}, \code{site} and
-#' \code{position}.
+#' For convenience summary of the \code{title}, \code{site} and \code{position}.
#'
#' @param out_nm A name to outputs.
-#' @seealso \link{find_unimod}, \link{parse_unimod}.
+#' @seealso \link{find_unimod}, \link{parse_unimod}, \link{add_unimod},
+#' \link{remove_unimod}, \link{remove_unimod_title},
+#' \link{calc_unimod_compmass}.
#' @examples
#' \donttest{
#' library(mzion)
-#'
+#'
#' ans <- table_unimods()
-#'
-#' ## TMT-6, -10 and -11 plexes
+#'
+#' ## TMT-6, -10 and -11 plexes
#' # share the same Unimod entry at title "TMT6plex"
#' # (the same chemistry at tag mass 229.162932 Da)
#' ans[with(ans, title == "TMT6plex"), ]
#' this_mod1 <- parse_unimod("TMT6plex (Anywhere = K)")
-#'
+#'
#' # Convenience title, "TMT10plex", alias to "TMT6plex"
#' this_mod2 <- parse_unimod("TMT10plex (Anywhere = K)")
-#'
+#'
#' # Title "TMT11plex" alias to "TMT6plex"
#' this_mod3 <- parse_unimod("TMT11plex (Anywhere = K)")
-#'
+#'
#' ## TMT-16
#' ans[with(ans, title == "TMTpro"), ]
#' this_mod1 <- parse_unimod("TMTpro (Anywhere = K)")
-#'
+#'
#' # Both "TMTpro16" and "TMT16plex" alias to "TMTpro"
#' this_mod2 <- parse_unimod("TMTpro16 (Anywhere = K)")
#' this_mod3 <- parse_unimod("TMT16plex (Anywhere = K)")
-#'
+#'
#' ## Summary of TMT entries and alias
#' ans[with(ans, grepl("^TMT", title)), ]
-#'
-#'
+#'
+#'
#' ## Special characters in the title (e.g., "->")
#' ans[with(ans, grepl("^Gln->pyro", title)), ]
#' this_mod <- parse_unimod("Gln->pryo-Glu (N-term = Q)")
-#'
+#'
#' }
#' @export
table_unimods <- function (out_nm = "~/mzion/unimods.txt")
@@ -643,7 +626,7 @@ add_unimod <- function (header = c(title = "Foo", full_name = "Foo bar"),
}
local({
- if (!(length(site) == 1L))
+ if (length(site) != 1L)
stop("The length of `site` is not one.")
if (!(length(position) == 1L))
@@ -709,9 +692,10 @@ add_unimod <- function (header = c(title = "Foo", full_name = "Foo bar"),
local({
this_delta <- xml2::xml_find_all(this_mod, "umod:delta")
-
- stopifnot(length(this_delta) == 1L)
+ if (length(this_delta) != 1L)
+ stop("Multiple or no matches.")
+
this_mono_mass <- xml2::xml_attr(this_delta, "mono_mass")
this_avge_mass <- xml2::xml_attr(this_delta, "avge_mass")
this_composition <- xml2::xml_attr(this_delta, "composition")
@@ -759,9 +743,7 @@ add_unimod <- function (header = c(title = "Foo", full_name = "Foo bar"),
positions <- unlist(lapply(attrs_children, `[`, c("position")))
ok_sitepos <- which((sites == site) & (positions == position))
- len_sitepos <- length(ok_sitepos)
-
- if (len_sitepos > 1L)
+ if ((len_sitepos <- length(ok_sitepos)) > 1L)
stop("Multiple matches to `site = ", site, "` and ",
"`position = ", position, "` at ",
"`title = ", title, "`.\n",
@@ -1129,15 +1111,11 @@ remove_unimod <- function (header = c(title = "Foo", full_name = "Foo bar"),
# title
idx_title <- which(xml2::xml_attr(modifications, "title") == title)
- local({
- len_title <- length(idx_title)
-
- if (len_title > 1L)
- stop("Multiple matches to `", title, "`.\n",
- "Fix the redundancy from ", xml_file, ".")
- else if (!len_title)
- stop("Entry `", title, "` not found.")
- })
+ if ((len_title <- length(idx_title)) > 1L)
+ stop("Multiple matches to `", title, "`.\n",
+ "Fix the redundancy from ", xml_file, ".")
+ else if (!len_title)
+ stop("Entry `", title, "` not found.")
# `site` and `position`
this_mod <- modifications[[idx_title]]
@@ -1159,19 +1137,15 @@ remove_unimod <- function (header = c(title = "Foo", full_name = "Foo bar"),
positions <- unlist(lapply(attrs_mod_ch, `[`, c("position")))
ok_sitepos <- which((sites == site) & (positions == position))
- local({
- len_sitepos <- length(ok_sitepos)
-
- if (!len_sitepos)
- stop("No matches to `site = ", site, "` and ",
- "`position = ", position, "` at ",
- "`title = ", title, "`.\n")
- else if (len_sitepos > 1L)
- stop("Multiple matches to `site = ", site, "` and ",
- "`position = ", position, "` at ",
- "`title = ", title, "`.\n",
- "Fix the redundancy from ", xml_file, ".")
- })
+ if (!(len_sitepos <- length(ok_sitepos)))
+ stop("No matches to `site = ", site, "` and ",
+ "`position = ", position, "` at ",
+ "`title = ", title, "`.\n")
+ else if (len_sitepos > 1L)
+ stop("Multiple matches to `site = ", site, "` and ",
+ "`position = ", position, "` at ",
+ "`title = ", title, "`.\n",
+ "Fix the redundancy from ", xml_file, ".")
this_spec <- nodes_mod_ch[ok_sitepos]
@@ -1208,14 +1182,10 @@ remove_unimod <- function (header = c(title = "Foo", full_name = "Foo bar"),
neuloss_mono_masses <- unlist(lapply(attrs_neuloss, `[`, c("mono_mass")))
idx_neuloss <- which((neuloss_mono_masses == neuloss_mono_mass))
- local({
- len_neuloss <- length(idx_neuloss)
-
- if (len_neuloss > 1L)
- stop("Multiple matches to the `mono_mass` in `neuloss`.")
- else if (!len_neuloss)
- stop("No matches to the `mono_mass` in `neuloss`.")
- })
+ if ((len_neuloss <- length(idx_neuloss)) > 1L)
+ stop("Multiple matches to the `mono_mass` in `neuloss`.")
+ else if (!len_neuloss)
+ stop("No matches to the `mono_mass` in `neuloss`.")
this_neuloss <- nodes_neuloss[idx_neuloss]
xml2::xml_remove(this_neuloss)
@@ -1304,9 +1274,7 @@ remove_unimod_title <- function (title = NULL)
idx <- which(xml2::xml_attr(modifications, "title") == title)
- len <- length(idx)
-
- if (len > 1L)
+ if ((len <- length(idx)) > 1L)
stop("Multiple matches to `", title, "`.\n",
"Fix the redundancy from ", xml_file, ".")
else if (!len)
@@ -1378,12 +1346,12 @@ parse_unimod_composition <- function (composition = "H(4) C O S")
{
options(digits = 9L)
- df <- composition %>%
- stringr::str_replace_all("([:alnum:]+)$", paste0("\\1", "(1)")) %>%
- stringr::str_replace_all("([:alnum:]+) ", paste0("\\1", "(1) ")) %>%
- gsub("\\s+", "", .) %>%
- stringr::str_match_all("(.*?)\\((-*[0-9]+)\\)") %>%
- `[[`(1)
+ df <- composition |>
+ stringr::str_replace_all("([:alnum:]+)$", paste0("\\1", "(1)")) |>
+ stringr::str_replace_all("([:alnum:]+) ", paste0("\\1", "(1) "))
+ df <- gsub("\\s+", "", df)
+ df <- stringr::str_match_all(df, "(.*?)\\((-*[0-9]+)\\)")
+ df <- df[[1]]
df <- df[, -1, drop = FALSE]
colnames(df) <- c("symbol", "number")
diff --git a/inst/extdata/custom.xml b/inst/extdata/custom.xml
index 9946ed3..529bf7b 100644
--- a/inst/extdata/custom.xml
+++ b/inst/extdata/custom.xml
@@ -259,5 +259,38 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/man/calc_pepfdr.Rd b/man/calc_pepfdr.Rd
index 036e648..072eae0 100644
--- a/man/calc_pepfdr.Rd
+++ b/man/calc_pepfdr.Rd
@@ -9,7 +9,7 @@ calc_pepfdr(
fdr_type = "protein",
min_len = 7L,
max_len = 40L,
- ms1_offsets = 0,
+ is_notched = FALSE,
max_pepscores_co = 50,
min_pepscores_co = 0,
enzyme = "trypsin_p",
@@ -30,8 +30,7 @@ for considerations. Shorter peptides will be excluded. The default is 7.}
\item{max_len}{A positive integer; the maximum length of peptide sequences
for considerations. Longer peptides will be excluded. The default is 40.}
-\item{ms1_offsets}{Off-sets in precursor masses in relative to the original
-values in MGFs.}
+\item{is_notched}{Logical; is a search with MS1 notches or not.}
\item{max_pepscores_co}{A positive numeric; the upper limit in the cut-offs
of peptide scores for discriminating significant and insignificant
diff --git a/man/calc_peploc.Rd b/man/calc_peploc.Rd
index 0ef8fd1..1102908 100644
--- a/man/calc_peploc.Rd
+++ b/man/calc_peploc.Rd
@@ -8,7 +8,7 @@ calc_peploc(
x = NULL,
out_path = NULL,
mod_indexes = NULL,
- ms1_offsets = 0,
+ is_notched = FALSE,
locmods = c("Phospho (S)", "Phospho (T)", "Phospho (Y)"),
topn_mods_per_seq = 3L,
topn_seqs_per_query = 3L
@@ -22,8 +22,7 @@ calc_peploc(
\item{mod_indexes}{Integer; the indexes of fixed and/or variable
modifications.}
-\item{ms1_offsets}{Off-sets in precursor masses in relative to the original
-values in MGFs.}
+\item{is_notched}{Logical; is a search with MS1 notches or not.}
\item{locmods}{Among \code{varmods} for the consideration of localization
probabilities; for instance, \code{locmods = NULL} for nothing,
diff --git a/man/calc_pepscores.Rd b/man/calc_pepscores.Rd
index cb98b2b..2d0970c 100644
--- a/man/calc_pepscores.Rd
+++ b/man/calc_pepscores.Rd
@@ -16,7 +16,6 @@ calc_pepscores(
min_ms2mass = 115L,
index_mgf_ms2 = FALSE,
tally_ms2ints = TRUE,
- ms1_offsets = 0,
mgf_path,
maxn_vmods_per_pep = 5L,
maxn_sites_per_vmod = 3L,
@@ -90,9 +89,6 @@ interrogation. The default is 110.}
\item{tally_ms2ints}{Logical; tally MS2 intensities or not.}
-\item{ms1_offsets}{Off-sets in precursor masses in relative to the original
-values in MGFs.}
-
\item{mgf_path}{A file path to a list of MGF files. The experimenter needs to
supply the files.
diff --git a/man/calcpepsc.Rd b/man/calcpepsc.Rd
index b9ddc52..ef832cd 100644
--- a/man/calcpepsc.Rd
+++ b/man/calcpepsc.Rd
@@ -18,7 +18,6 @@ calcpepsc(
d2 = 1e-05,
index_mgf_ms2 = FALSE,
tally_ms2ints = TRUE,
- ms1_offsets = 0,
add_ms2theos = FALSE,
add_ms2theos2 = FALSE,
add_ms2moverzs = FALSE,
@@ -79,9 +78,6 @@ interrogation. The default is 110.}
\item{tally_ms2ints}{Logical; tally MS2 intensities or not.}
-\item{ms1_offsets}{Off-sets in precursor masses in relative to the original
-values in MGFs.}
-
\item{add_ms2theos}{Logical. If true, adds the sequence of primary
theoretical MS2 m/z values (\code{pep_ms2_theos}). The sequence order at a
given \code{type_ms2ions} is:
diff --git a/man/calib_mgf.Rd b/man/calib_mgf.Rd
index c7b5e90..0d9fb52 100644
--- a/man/calib_mgf.Rd
+++ b/man/calib_mgf.Rd
@@ -39,7 +39,7 @@ calib_mgf(
min_len = 7L,
max_len = 40L,
max_miss = 2L,
- knots = 50L
+ knots = 5L
)
}
\arguments{
diff --git a/man/calib_ms1.Rd b/man/calib_ms1.Rd
index c8ddea7..da9fbd6 100644
--- a/man/calib_ms1.Rd
+++ b/man/calib_ms1.Rd
@@ -12,7 +12,7 @@ calib_ms1(
ppm_ms1 = 20L,
min_mass = 200L,
max_mass = 4500L,
- knots = 50L
+ knots = 5L
)
}
\arguments{
diff --git a/man/check_locmods.Rd b/man/check_locmods.Rd
index 75688a0..297e366 100644
--- a/man/check_locmods.Rd
+++ b/man/check_locmods.Rd
@@ -4,9 +4,20 @@
\alias{check_locmods}
\title{Checks \code{locmods}}
\usage{
-check_locmods(fixedmods, varmods, locmods)
+check_locmods(locmods, fixedmods, varmods, ms1_neulosses = NULL)
}
\arguments{
+\item{locmods}{Among \code{varmods} for the consideration of localization
+ probabilities; for instance, \code{locmods = NULL} for nothing,
+ \code{locmods = c("Phospho (S)", "Phospho (T)", "Phospho (Y)")} for
+ phosphopeptides, \code{locmods = "Acetyl (K)"} for lysine acetylation.
+ \code{fixedmods} that were coerced to \code{varmods} will be added
+ automatically to \code{locmods}.
+
+ For convenience, the default is set to look for applicable peptide
+ phosphorylation (and may encounter warning messages if the data type is
+ different to the default).}
+
\item{fixedmods}{Character string(s) of fixed modifications.}
\item{varmods}{Character string(s) of variable modifications. Multiple
@@ -20,16 +31,13 @@ check_locmods(fixedmods, varmods, locmods)
\link{parse_unimod} for grammars of modification \code{title},
\code{position} and \code{site}.}
-\item{locmods}{Among \code{varmods} for the consideration of localization
- probabilities; for instance, \code{locmods = NULL} for nothing,
- \code{locmods = c("Phospho (S)", "Phospho (T)", "Phospho (Y)")} for
- phosphopeptides, \code{locmods = "Acetyl (K)"} for lysine acetylation.
- \code{fixedmods} that were coerced to \code{varmods} will be added
- automatically to \code{locmods}.
+\item{ms1_neulosses}{Character string(s) specifying the neutral losses of
+ precursors. The nomenclature is the same as those in argument
+ \code{varmods}, e.g., \code{c("Phospho (S)", "Phospho (T)", "Phospho
+ (Y)")}.
- For convenience, the default is set to look for applicable peptide
- phosphorylation (and may encounter warning messages if the data type is
- different to the default).}
+ The argument is a simplified (narrower) usage of argument
+ \code{ms1_notches} with additional specificity in modifications and sites.}
}
\description{
Coerced \code{fixedmods} not considered.
diff --git a/man/check_notches.Rd b/man/check_notches.Rd
new file mode 100644
index 0000000..fee4952
--- /dev/null
+++ b/man/check_notches.Rd
@@ -0,0 +1,24 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/msmsmatches.R
+\name{check_notches}
+\alias{check_notches}
+\title{Checks the compatibility between ms1_notches and ms1_neulosses.}
+\usage{
+check_notches(ms1_notches, ms1_neulosses)
+}
+\arguments{
+\item{ms1_notches}{A numeric vector; notches (off-sets) in precursor masses,
+e.g., \code{c(-97.976896)} to account fo the loss of a phosphoric acid in
+precursor masses.}
+
+\item{ms1_neulosses}{Character string(s) specifying the neutral losses of
+ precursors. The nomenclature is the same as those in argument
+ \code{varmods}, e.g., \code{c("Phospho (S)", "Phospho (T)", "Phospho
+ (Y)")}.
+
+ The argument is a simplified (narrower) usage of argument
+ \code{ms1_notches} with additional specificity in modifications and sites.}
+}
+\description{
+Checks the compatibility between ms1_notches and ms1_neulosses.
+}
diff --git a/man/comb_ms1_offsets.Rd b/man/comb_ms1_offsets.Rd
new file mode 100644
index 0000000..297b5d9
--- /dev/null
+++ b/man/comb_ms1_offsets.Rd
@@ -0,0 +1,26 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/msmsmatches2.R
+\name{comb_ms1_offsets}
+\alias{comb_ms1_offsets}
+\title{Combines off-sets in precursor masses (notches and neutral losses).}
+\usage{
+comb_ms1_offsets(ms1_offsets = 0, ms1_neulosses = NULL)
+}
+\arguments{
+\item{ms1_offsets}{Precursor mass off-sets (notches, not neutral losses).}
+
+\item{ms1_neulosses}{Character string(s) specifying the neutral losses of
+ precursors. The nomenclature is the same as those in argument
+ \code{varmods}, e.g., \code{c("Phospho (S)", "Phospho (T)", "Phospho
+ (Y)")}.
+
+ The argument is a simplified (narrower) usage of argument
+ \code{ms1_notches} with additional specificity in modifications and sites.}
+}
+\value{
+A vector of mass off-sets.
+}
+\description{
+The return contains no information of Unimod titles and positions since it is
+only used for pairing with experimental MGF data.
+}
diff --git a/man/combine_fracs.Rd b/man/combine_fracs.Rd
index 8ac891a..f858fd8 100644
--- a/man/combine_fracs.Rd
+++ b/man/combine_fracs.Rd
@@ -4,17 +4,16 @@
\alias{combine_fracs}
\title{Combines results from fractions}
\usage{
-combine_fracs(files, tempdir, sc_path, ms1_offsets = 0)
+combine_fracs(files, tempdir, sc_path, is_notched = FALSE)
}
\arguments{
-\item{files}{A list of file names}
+\item{files}{A list of file names.}
-\item{tempdir}{A temporary directory containing the files}
+\item{tempdir}{A temporary directory containing the files.}
\item{sc_path}{An output path}
-\item{ms1_offsets}{Off-sets in precursor masses in relative to the original
-values in MGFs.}
+\item{is_notched}{Logical; is a search with MS1 notches or not.}
}
\description{
Combines results from fractions
diff --git a/man/find_ms1_offsets.Rd b/man/find_ms1_offsets.Rd
index 295967b..6222cd4 100644
--- a/man/find_ms1_offsets.Rd
+++ b/man/find_ms1_offsets.Rd
@@ -2,7 +2,7 @@
% Please edit documentation in R/msmsmatches2.R
\name{find_ms1_offsets}
\alias{find_ms1_offsets}
-\title{Finds offsets in precursor masses.}
+\title{Finds off-sets in precursor masses.}
\usage{
find_ms1_offsets(n_13c = 0L, ms1_notches = 0)
}
@@ -11,9 +11,12 @@ find_ms1_offsets(n_13c = 0L, ms1_notches = 0)
the range of \code{-1:2}. The default is 0.}
\item{ms1_notches}{A numeric vector; notches (off-sets) in precursor masses,
-e.g., \code{c(-79.966331, -97.976896)} to account fo the loss of a phospho
-group and phosphoric acid in precursor masses.}
+e.g., \code{c(-97.976896)} to account fo the loss of a phosphoric acid in
+precursor masses.}
+}
+\value{
+A vector of mass off-sets.
}
\description{
-Finds offsets in precursor masses.
+Finds off-sets in precursor masses.
}
diff --git a/man/find_optbs.Rd b/man/find_optbs.Rd
new file mode 100644
index 0000000..e309624
--- /dev/null
+++ b/man/find_optbs.Rd
@@ -0,0 +1,19 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/msmsmatches2.R
+\name{find_optbs}
+\alias{find_optbs}
+\title{Finds the optimal natural spline.}
+\usage{
+find_optbs(diff_ms1, ret_time, knots = 50L)
+}
+\arguments{
+\item{diff_ms1}{The differences between experimental and theoretical
+precursor masses.}
+
+\item{ret_time}{Retention timm.}
+
+\item{knots}{The number of knots.}
+}
+\description{
+Finds the optimal natural spline.
+}
diff --git a/man/find_optns.Rd b/man/find_optns.Rd
new file mode 100644
index 0000000..b356e8f
--- /dev/null
+++ b/man/find_optns.Rd
@@ -0,0 +1,19 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/msmsmatches2.R
+\name{find_optns}
+\alias{find_optns}
+\title{Finds the optimal natural spline.}
+\usage{
+find_optns(diff_ms1, ret_time, knots = 50L)
+}
+\arguments{
+\item{diff_ms1}{The differences between experimental and theoretical
+precursor masses.}
+
+\item{ret_time}{Retention timm.}
+
+\item{knots}{The number of knots.}
+}
+\description{
+Finds the optimal natural spline.
+}
diff --git a/man/hadd_primatches.Rd b/man/hadd_primatches.Rd
index 5ba3f4b..7df0f78 100644
--- a/man/hadd_primatches.Rd
+++ b/man/hadd_primatches.Rd
@@ -6,7 +6,7 @@
\usage{
hadd_primatches(
out_path = NULL,
- ms1_offsets = 0,
+ is_notched = FALSE,
add_ms2theos = FALSE,
add_ms2theos2 = FALSE,
add_ms2moverzs = FALSE,
@@ -18,8 +18,7 @@ hadd_primatches(
\arguments{
\item{out_path}{A file path of outputs.}
-\item{ms1_offsets}{Off-sets in precursor masses in relative to the original
-values in MGFs.}
+\item{is_notched}{Logical; is a search with MS1 notches or not.}
\item{add_ms2theos}{Logical. If true, adds the sequence of primary
theoretical MS2 m/z values (\code{pep_ms2_theos}). The sequence order at a
diff --git a/man/hcalc_tmtint.Rd b/man/hcalc_tmtint.Rd
index 10914d0..2bda578 100644
--- a/man/hcalc_tmtint.Rd
+++ b/man/hcalc_tmtint.Rd
@@ -9,7 +9,6 @@ hcalc_tmtint(
quant = "tmt10",
ppm_reporters = 10L,
idx = 1L,
- ms1_offsets = 0,
out_path = NULL
)
}
@@ -30,9 +29,6 @@ ions. The default is 10.}
\item{idx}{The i-th chunk}
-\item{ms1_offsets}{Off-sets in precursor masses in relative to the original
-values in MGFs.}
-
\item{out_path}{A file path of outputs.}
}
\description{
diff --git a/man/hms2match.Rd b/man/hms2match.Rd
index 7b5ca1f..9fa6721 100644
--- a/man/hms2match.Rd
+++ b/man/hms2match.Rd
@@ -9,6 +9,9 @@ hms2match(
funs_ms2,
ms1vmods_all,
ms2vmods_all,
+ ms1_neulosses = NULL,
+ maxn_neulosses_fnl = 1L,
+ maxn_neulosses_vnl = 1L,
mod_indexes,
mgf_path,
out_path,
@@ -39,6 +42,26 @@ correspondence to \code{aa_masses_all}}
\item{ms2vmods_all}{All possible labels of MS2 variable modifications in
correspondence to \code{aa_masses_all}}
+\item{ms1_neulosses}{Character string(s) specifying the neutral losses of
+ precursors. The nomenclature is the same as those in argument
+ \code{varmods}, e.g., \code{c("Phospho (S)", "Phospho (T)", "Phospho
+ (Y)")}.
+
+ The argument is a simplified (narrower) usage of argument
+ \code{ms1_notches} with additional specificity in modifications and sites.}
+
+\item{maxn_neulosses_fnl}{A positive integer used in conjunction with
+arguments \code{ms1_notches} and \code{ms1_neulosses}. The maximum number
+of fixed MS2 neutral losses to be considered when searching against peak
+lists with MS1 mass off-sets. The default is two (one MS2 neutral loss in
+addition to 0).}
+
+\item{maxn_neulosses_vnl}{A positive integer used in conjunction with
+arguments \code{ms1_notches} and \code{ms1_neulosses}. The maximum number
+of variable MS2 neutral losses to be considered when searching against peak
+lists with MS1 mass off-sets. The default is two (one MS2 neutral loss in
+addition to 0).}
+
\item{mod_indexes}{Integer; the indexes of fixed and/or variable
modifications}
diff --git a/man/hms2match_one.Rd b/man/hms2match_one.Rd
index 0a30a3a..e39e3de 100644
--- a/man/hms2match_one.Rd
+++ b/man/hms2match_one.Rd
@@ -6,12 +6,16 @@
\usage{
hms2match_one(
pep_mod_group,
- mgths,
+ nms_theo,
+ nms_expt,
aa_masses,
FUN,
ms1vmods,
ms2vmods,
cl,
+ ms1_neulosses = NULL,
+ maxn_neulosses_fnl = 1L,
+ maxn_neulosses_vnl = 1L,
mod_indexes,
mgf_path,
out_path,
@@ -32,7 +36,9 @@ hms2match_one(
\arguments{
\item{pep_mod_group}{The index of peptide modification groups.}
-\item{mgths}{Pairs of experimental and theoretical data.}
+\item{nms_theo}{Names of theoretical files.}
+
+\item{nms_expt}{Names of experimental files.}
\item{aa_masses}{An amino-acid look-up.}
@@ -47,6 +53,26 @@ an i-t \code{aa_masses}.}
\item{cl}{The value of clusters for parallel processes.}
+\item{ms1_neulosses}{Character string(s) specifying the neutral losses of
+ precursors. The nomenclature is the same as those in argument
+ \code{varmods}, e.g., \code{c("Phospho (S)", "Phospho (T)", "Phospho
+ (Y)")}.
+
+ The argument is a simplified (narrower) usage of argument
+ \code{ms1_notches} with additional specificity in modifications and sites.}
+
+\item{maxn_neulosses_fnl}{A positive integer used in conjunction with
+arguments \code{ms1_notches} and \code{ms1_neulosses}. The maximum number
+of fixed MS2 neutral losses to be considered when searching against peak
+lists with MS1 mass off-sets. The default is two (one MS2 neutral loss in
+addition to 0).}
+
+\item{maxn_neulosses_vnl}{A positive integer used in conjunction with
+arguments \code{ms1_notches} and \code{ms1_neulosses}. The maximum number
+of variable MS2 neutral losses to be considered when searching against peak
+lists with MS1 mass off-sets. The default is two (one MS2 neutral loss in
+addition to 0).}
+
\item{mod_indexes}{Integer; the indexes of fixed and/or variable
modifications}
diff --git a/man/matchMS.Rd b/man/matchMS.Rd
index 72de881..956ef59 100644
--- a/man/matchMS.Rd
+++ b/man/matchMS.Rd
@@ -15,6 +15,9 @@ matchMS(
varmods = c("Acetyl (Protein N-term)", "Oxidation (M)", "Deamidated (N)",
"Gln->pyro-Glu (N-term = Q)"),
rm_dup_term_anywhere = TRUE,
+ ms1_neulosses = NULL,
+ maxn_neulosses_fnl = 2L,
+ maxn_neulosses_vnl = 2L,
fixedlabs = NULL,
varlabs = NULL,
locmods = c("Phospho (S)", "Phospho (T)", "Phospho (Y)"),
@@ -144,6 +147,26 @@ c("uniprot_acc", "uniprot_id", "refseq_acc", "other"). See also
variable modifications with site(s) in positions of both terminal and
anywhere, e.g., "Gln->pyro-Glu (N-term = Q)" and "Deamidated (Q).}
+\item{ms1_neulosses}{Character string(s) specifying the neutral losses of
+ precursors. The nomenclature is the same as those in argument
+ \code{varmods}, e.g., \code{c("Phospho (S)", "Phospho (T)", "Phospho
+ (Y)")}.
+
+ The argument is a simplified (narrower) usage of argument
+ \code{ms1_notches} with additional specificity in modifications and sites.}
+
+\item{maxn_neulosses_fnl}{A positive integer used in conjunction with
+arguments \code{ms1_notches} and \code{ms1_neulosses}. The maximum number
+of fixed MS2 neutral losses to be considered when searching against peak
+lists with MS1 mass off-sets. The default is two (one MS2 neutral loss in
+addition to 0).}
+
+\item{maxn_neulosses_vnl}{A positive integer used in conjunction with
+arguments \code{ms1_notches} and \code{ms1_neulosses}. The maximum number
+of variable MS2 neutral losses to be considered when searching against peak
+lists with MS1 mass off-sets. The default is two (one MS2 neutral loss in
+addition to 0).}
+
\item{fixedlabs}{Character string(s) of fixed isotopic labels. See examples
of SILAC for details. Can be but not typically used in standard alone
searches of labeled residues.}
@@ -271,8 +294,8 @@ default is 20.}
the range of \code{-1:2}. The default is 0.}
\item{ms1_notches}{A numeric vector; notches (off-sets) in precursor masses,
-e.g., \code{c(-79.966331, -97.976896)} to account fo the loss of a phospho
-group and phosphoric acid in precursor masses.}
+e.g., \code{c(-97.976896)} to account fo the loss of a phosphoric acid in
+precursor masses.}
\item{par_groups}{A low-priority feature. Parameter(s) of \code{matchMS}
multiplied by sets of values in groups. Multiple searches will be performed
@@ -671,9 +694,9 @@ matchMS(
matchMS(
fixedmods = c("TMTpro (N-term)", "TMTpro (K)", "Carbamidomethyl (C)"),
varmods = c("Acetyl (Protein N-term)", "Oxidation (M)",
- "Deamidated (N)", "Gln->pyro-Glu (N-term = Q)"),
+ "Deamidated (N)", "Deamidated (Q)",
+ "Gln->pyro-Glu (N-term = Q)"),
quant = "tmt18",
- fdr_type = "psm",
out_path = "~/mzion/examples",
)
diff --git a/man/mprepBrukerMGF.Rd b/man/mprepBrukerMGF.Rd
index 0c62269..56ee95c 100644
--- a/man/mprepBrukerMGF.Rd
+++ b/man/mprepBrukerMGF.Rd
@@ -4,7 +4,7 @@
\alias{mprepBrukerMGF}
\title{Parallel \link{prepBrukerMGF}}
\usage{
-mprepBrukerMGF(filepath, n_cores = 48L)
+mprepBrukerMGF(filepath, n_cores = 32L)
}
\arguments{
\item{filepath}{A file path to MGF.}
diff --git a/man/ms2match.Rd b/man/ms2match.Rd
index edf591f..af5436e 100644
--- a/man/ms2match.Rd
+++ b/man/ms2match.Rd
@@ -13,8 +13,8 @@ ms2match(
type_ms2ions = "by",
maxn_vmods_per_pep = 5L,
maxn_sites_per_vmod = 3L,
- maxn_fnl_per_seq = 64L,
- maxn_vnl_per_seq = 64L,
+ maxn_fnl_per_seq = 3L,
+ maxn_vnl_per_seq = 3L,
maxn_vmods_sitescombi_per_pep = 64L,
minn_ms2 = 6L,
ppm_ms1 = 20L,
@@ -27,6 +27,9 @@ ms2match(
by_modules = TRUE,
reframe_mgfs = FALSE,
ms1_offsets = 0,
+ ms1_neulosses = NULL,
+ maxn_neulosses_fnl = 1L,
+ maxn_neulosses_vnl = 1L,
fasta,
acc_type,
acc_pattern,
@@ -140,6 +143,26 @@ FALSE, search all modules together. The later would probably need more than
\item{ms1_offsets}{The MS1 off-sets.}
+\item{ms1_neulosses}{Character string(s) specifying the neutral losses of
+ precursors. The nomenclature is the same as those in argument
+ \code{varmods}, e.g., \code{c("Phospho (S)", "Phospho (T)", "Phospho
+ (Y)")}.
+
+ The argument is a simplified (narrower) usage of argument
+ \code{ms1_notches} with additional specificity in modifications and sites.}
+
+\item{maxn_neulosses_fnl}{A positive integer used in conjunction with
+arguments \code{ms1_notches} and \code{ms1_neulosses}. The maximum number
+of fixed MS2 neutral losses to be considered when searching against peak
+lists with MS1 mass off-sets. The default is two (one MS2 neutral loss in
+addition to 0).}
+
+\item{maxn_neulosses_vnl}{A positive integer used in conjunction with
+arguments \code{ms1_notches} and \code{ms1_neulosses}. The maximum number
+of variable MS2 neutral losses to be considered when searching against peak
+lists with MS1 mass off-sets. The default is two (one MS2 neutral loss in
+addition to 0).}
+
\item{fasta}{Character string(s) to the name(s) of fasta file(s) with
prepended directory path. The experimenter needs to supply the files.}
diff --git a/man/ms2match_one.Rd b/man/ms2match_one.Rd
index 2e49a4e..680508e 100644
--- a/man/ms2match_one.Rd
+++ b/man/ms2match_one.Rd
@@ -2,10 +2,11 @@
% Please edit documentation in R/ms2frames.R
\name{ms2match_one}
\alias{ms2match_one}
-\title{Matches experimentals and theoreticals}
+\title{Matches experimentals and theoreticals.}
\usage{
ms2match_one(
- mgth,
+ nms_theo,
+ nms_expt,
pep_mod_group,
aa_masses,
FUN,
@@ -30,7 +31,9 @@ ms2match_one(
)
}
\arguments{
-\item{mgth}{MGF and theoretical pairs}
+\item{nms_theo}{Names of theoretical files.}
+
+\item{nms_expt}{Names of experimental files.}
\item{pep_mod_group}{The index of peptide modification groups}
@@ -126,5 +129,5 @@ interrogation. The default is 110.}
\item{df0}{An output template}
}
\description{
-For a single module
+For a single module and single notch.
}
diff --git a/man/order_fracs3.Rd b/man/order_fracs3.Rd
new file mode 100644
index 0000000..23d228e
--- /dev/null
+++ b/man/order_fracs3.Rd
@@ -0,0 +1,18 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/scores.R
+\name{order_fracs3}
+\alias{order_fracs3}
+\title{Order fractions}
+\usage{
+order_fracs3(type = "list_table", tempdir, by_modules = TRUE)
+}
+\arguments{
+\item{type}{The type of files.}
+
+\item{tempdir}{A temporary directory containing the files.}
+
+\item{by_modules}{Logical; if TRUE, performs searches by modules.}
+}
+\description{
+Three layers: module_notch_frac
+}
diff --git a/man/post_calib.Rd b/man/post_calib.Rd
new file mode 100644
index 0000000..5bc9097
--- /dev/null
+++ b/man/post_calib.Rd
@@ -0,0 +1,33 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/msmsmatches2.R
+\name{post_calib}
+\alias{post_calib}
+\title{Post MS1 calibrations.}
+\usage{
+post_calib(mgfs, min_mass, max_mass, mgf_path, filename)
+}
+\arguments{
+\item{mgfs}{MGF data.}
+
+\item{min_mass}{A positive integer; the minimum precursor mass for
+interrogation. The default is an arbitrarily low value (the primary guard
+against low molecular-weight precursors is \code{min_len}).}
+
+\item{max_mass}{A positive integer; the maximum precursor mass for
+interrogation.}
+
+\item{mgf_path}{A file path to a list of MGF files. The experimenter needs to
+ supply the files.
+
+ The supported MGFs are in the formats of (1) MSConvert against \code{.raw}
+ from Thermo's Orbitrap or \code{.d} from Bruker's timsTOF Pro, (2) Thermo's
+ Proteome Discoverer or (3) Bruker's DataAnalysis.
+
+ With MSConvert, the default \code{titleMaker} is required for correct
+ parsing (don't think it can be altered by users, but just in case).}
+
+\item{filename}{An MGF file name}
+}
+\description{
+Post MS1 calibrations.
+}
diff --git a/man/prep_pepfdr_td.Rd b/man/prep_pepfdr_td.Rd
index 71faa51..ad37b02 100644
--- a/man/prep_pepfdr_td.Rd
+++ b/man/prep_pepfdr_td.Rd
@@ -10,7 +10,7 @@ prep_pepfdr_td(
enzyme = "trypsin_p",
nes_fdr_group = "base",
fdr_group = "base",
- ms1_offsets = 0,
+ is_notched = FALSE,
only_notch_zero = TRUE
)
}
@@ -39,8 +39,9 @@ peptide FDR controls. The value is in one of \code{c("all", "base")}. The
\code{base} corresponds to the modification group with the largest number
of matches.}
-\item{only_notch_zero}{Logical; if TRUE, use only data at \code{ms1_offsets =
-0}.}
+\item{is_notched}{Logical; is a search with MS1 notches or not.}
+
+\item{only_notch_zero}{Logical; if TRUE, use only data at the zero notch.}
}
\description{
Prepares target-decoy data.
diff --git a/man/table_unimods.Rd b/man/table_unimods.Rd
index 2a90ee3..bf5a492 100644
--- a/man/table_unimods.Rd
+++ b/man/table_unimods.Rd
@@ -10,8 +10,7 @@ table_unimods(out_nm = "~/mzion/unimods.txt")
\item{out_nm}{A name to outputs.}
}
\description{
-For convenience summary of the \code{title}, \code{site} and
-\code{position}.
+For convenience summary of the \code{title}, \code{site} and \code{position}.
}
\examples{
\donttest{
@@ -19,7 +18,7 @@ library(mzion)
ans <- table_unimods()
-## TMT-6, -10 and -11 plexes
+## TMT-6, -10 and -11 plexes
# share the same Unimod entry at title "TMT6plex"
# (the same chemistry at tag mass 229.162932 Da)
ans[with(ans, title == "TMT6plex"), ]
@@ -50,5 +49,7 @@ this_mod <- parse_unimod("Gln->pryo-Glu (N-term = Q)")
}
}
\seealso{
-\link{find_unimod}, \link{parse_unimod}.
+\link{find_unimod}, \link{parse_unimod}, \link{add_unimod},
+ \link{remove_unimod}, \link{remove_unimod_title},
+ \link{calc_unimod_compmass}.
}