diff --git a/DESCRIPTION b/DESCRIPTION index 8ac8f5a..af54545 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: mzion Type: Package Title: Database Searches of Proteomic Mass-spectrometrirc Data -Version: 1.2.6.2 +Version: 1.2.6.3 Authors@R: person(given = "Qiang", family = "Zhang", diff --git a/R/ms2_gen.R b/R/ms2_gen.R index e6ec1a3..20d1418 100644 --- a/R/ms2_gen.R +++ b/R/ms2_gen.R @@ -555,7 +555,7 @@ calc_ms2ions_a1_vnl0_fnl0 <- function (M, P, aam, aa_masses, ntmass, ctmass, out <- vector("list", nvm) naa <- length(aam) - hex0 <- rep_len("0", naa) + hx0 <- rep_len("0", naa) for (i in 1:nvm) { vi <- P[i, ] @@ -563,7 +563,7 @@ calc_ms2ions_a1_vnl0_fnl0 <- function (M, P, aam, aa_masses, ntmass, ctmass, aam_i[vi] <- aam_i[vi] + ds out[[i]] <- ms2ions_by_type(aam_i, ntmass, ctmass, type_ms2ions, digits) - h <- hex0 + h <- hx0 h[vi] <- mod_indexes[M] names(out)[i] <- .Internal(paste0(list(h), collapse = "", recycle0 = FALSE)) } @@ -848,7 +848,8 @@ gen_ms2ions_a1_vnl0_fnl1 <- function (aa_seq = NULL, ms1_mass = NULL, } } else { - fnl_combi <- expand_grid_rows(fmods_nl[fmods_combi], nmax = maxn_fnl_per_seq) + fnl_combi <- expand_grid_rows(fmods_nl[fmods_combi], nmax = maxn_fnl_per_seq, + use.names = FALSE) } # most likely a list-one @@ -905,7 +906,7 @@ calc_ms2ions_a1_vnl0_fnl1 <- function (M, P, fnl_combi, fnl_idxes, r <- 1L naa <- length(aam) - hex0 <- rep_len("0", naa) + hx0 <- rep_len("0", naa) for (i in 1:nvm) { vi <- P[i, ] @@ -926,7 +927,7 @@ calc_ms2ions_a1_vnl0_fnl1 <- function (M, P, fnl_combi, fnl_idxes, } } - h <- hex0 + h <- hx0 h[vi] <- mod_indexes[M] h <- .Internal(paste0(list(h), collapse = "", recycle0 = FALSE)) @@ -1213,7 +1214,7 @@ gen_ms2ions_a1_vnl1_fnl0 <- function (aa_seq = NULL, ms1_mass = NULL, mod_indexes = mod_indexes, digits = digits) else af <- calc_ms2ions_a1_vnl1_fnl0( - N = expand_grid_rows(vmods_nl[ms2vmods], nmax = nnl), + N = expand_grid_rows(vmods_nl[ms2vmods], nmax = nnl, use.names = FALSE), M = M, P = P, aam = aam, @@ -1243,8 +1244,7 @@ gen_ms2ions_a1_vnl1_fnl0 <- function (aa_seq = NULL, ms1_mass = NULL, mod_indexes = mod_indexes, digits = digits) } else { - M <- split_matrix(M, by = "row") - af <- lapply(M, calc_ms2ions_a1_vnl0_fnl0, + af <- lapply(split_matrix(M, by = "row"), calc_ms2ions_a1_vnl0_fnl0, P = P, aam = aam, aa_masses = aa_masses, ntmass = ntmass, ctmass = ctmass, type_ms2ions = type_ms2ions, mod_indexes = mod_indexes, digits = digits) @@ -1255,7 +1255,7 @@ gen_ms2ions_a1_vnl1_fnl0 <- function (aa_seq = NULL, ms1_mass = NULL, M <- split_matrix(M, by = "row") l <- maxn_vmods_sitescombi_per_pep %/% n1 - if (l == 1L) { + if (l <= 1L) { af <- lapply(M, calc_ms2ions_a1_vnl0_fnl0, P = P, aam = aam, aa_masses = aa_masses, ntmass = ntmass, ctmass = ctmass, type_ms2ions = type_ms2ions, @@ -1266,9 +1266,11 @@ gen_ms2ions_a1_vnl1_fnl0 <- function (aa_seq = NULL, ms1_mass = NULL, n2 <- n1 * prod(lengths(vmods_nl)) N <- if (n2 > maxn_vmods_sitescombi_per_pep) - lapply(M, function (x) expand_grid_rows(vmods_nl[x], nmax = 2L)) + lapply(M, function (x) + expand_grid_rows(vmods_nl[x], nmax = 2L, use.names = FALSE)) else - lapply(M, function (x) expand_grid_rows(vmods_nl[x], nmax = maxn_vnl_per_seq)) + lapply(M, function (x) + expand_grid_rows(vmods_nl[x], nmax = maxn_vnl_per_seq, use.names = FALSE)) af <- mapply( calc_ms2ions_a1_vnl1_fnl0, @@ -1316,16 +1318,14 @@ calc_ms2ions_a1_vnl1_fnl0 <- function (N, M, P, aam, aa_masses, type_ms2ions = "by", mod_indexes, digits = 4L) { - naa <- length(aam) - hex0 <- rep_len("0", naa) - ds <- aa_masses[M] nnl <- length(N) nvm <- nrow(P) len <- nvm * nnl out <- vector("list", len) - - r <- 1L + naa <- length(aam) + hx0 <- rep_len("0", naa) + r <- 1L for (i in 1:nvm) { vi <- P[i, ] @@ -1341,7 +1341,7 @@ calc_ms2ions_a1_vnl1_fnl0 <- function (N, M, P, aam, aa_masses, } # both i and j must exist - h <- hex0 + h <- hx0 h[vi] <- mod_indexes[M] h <- .Internal(paste0(list(h), collapse = "", recycle0 = FALSE)) diff --git a/R/ms2frames.R b/R/ms2frames.R index 6150625..9aec5ef 100644 --- a/R/ms2frames.R +++ b/R/ms2frames.R @@ -1036,8 +1036,8 @@ fuzzy_match_one2 <- function (x, y) #' #' @return Lists of (1) theo, (2) expt, (3) ith, (4) iex and (5) m. find_ms2_bypep <- function (theos = NULL, expts = NULL, ex = NULL, d = NULL, - ppm_ms2 = 10L, min_ms2mass = 115L, minn_ms2 = 6L, - index_mgf_ms2 = FALSE) + ppm_ms2 = 10L, min_ms2mass = 115L, minn_ms2 = 6L, + index_mgf_ms2 = FALSE) { ############################################################################## # `theos` @@ -1050,94 +1050,82 @@ find_ms2_bypep <- function (theos = NULL, expts = NULL, ex = NULL, d = NULL, # and never get matched. # # ex: `expts` in integers - # th_i: the i-th `theos` in integers + # thi: the i-th `theos` in integers # # ex has no duplicated entries; - # th_i can. + # thi can. # # Forward matching: match(theos, expts) # (i) allowed, e.g., b4- and y5 theo ions matched to the same ex value: # match(c(2,2,3,4), c(1:2, 5:10)) - # (ii) multiple ex' value's to the same th_i value not allowed; + # (ii) multiple ex' value's to the same thi value not allowed; # otherwise longer length `c(expts[bps], expts[yps])` than lhs. - # e.g. ex's 74953, 74955 both fit to th_i 74954 and the best one is applied. - # (after a th_i is matched, it will be removed from further matching) + # e.g. ex's 74953, 74955 both fit to thi 74954 and the best one is applied. + # (after a thi is matched, it will be removed from further matching) # # Backward matching: match(expts, theos) - # (i) %in% and %fin% only shows the first match for duplicated entries th_i: + # (i) %in% and %fin% only shows the first match for duplicated entries thi: # match(1:4, c(1, 2, 2, 5)) - # (so no worry about th_i duplication) + # (so no worry about thi duplication) ############################################################################## + nullout <- list(theo = NULL, expt = NULL, ith = NULL, iex = NULL, m = NULL) len <- length(theos) if (!len) - return(list(theo = NULL, expt = NULL, ith = NULL, iex = NULL, m = NULL)) + return(nullout) - # --- out <- vector("list", len) - ## forward matches if (len > 3L) { mths <- index_mz(.Internal(unlist(theos, recursive = FALSE, use.names = FALSE)), min_ms2mass, d) - pss <- mths %fin% ex | (mths - 1L) %fin% ex | (mths + 1L) %fin% ex - pss <- fold_vec(pss, len) - mths <- fold_vec(mths, len) - ipss <- lapply(pss, function (x) .Internal(which(x))) + tines <- mths %fin% ex | (mths - 1L) %fin% ex | (mths + 1L) %fin% ex + tines <- fold_vec(tines, len) + mths <- fold_vec(mths, len) + iths <- lapply(tines, function (x) .Internal(which(x))) for (i in 1:len) { - theos_i <- theos[[i]] - th_i <- mths[[i]] - ps <- pss[[i]] - ips <- ipss[[i]] - - ### the remaining are the same as those under "else" ### + theoi <- theos[[i]] + thi <- mths[[i]] + tine <- tines[[i]] + ith <- iths[[i]] + nth <- length(ith) - ## backward matches - # expts are in ascending orders, but theos in b1, b2, ... , y1, y2, ... - # separate matches to theos_b and theos_y, each are in ascending order - n_ps <- length(ips) - - if(n_ps >= minn_ms2) { - # separated b and y matches (to handled double-dipping between b and y) - # (adj: bps <- fuzzy_match_one2(ex, th_i[1:mid])) - lth <- length(ps) - mid <- lth/2L - - # experimental es initially filled by NA and matched theoretical values, - # and at the end replaced with matched experimental values - es <- theos_i - es[!ps] <- NA_real_ + if (nth >= minn_ms2) { + es <- theoi + es[!tine] <- NA_real_ - ex_bf <- ex - 1L - ex_af <- ex + 1L + thok <- thi[ith] + tine2 <- fastmatch::fmatch(c(thok, thok - 1L, thok + 1L), ex) # 2us + iex <- vector("integer", nth) - # b-ions - y_1 <- th_i[1:mid] - ps_1 <- ex %fin% y_1 | ex_bf %fin% y_1 | ex_af %fin% y_1 - ips_1 <- .Internal(which(ps_1)) - - # y-ions - y_2 <- th_i[(mid+1L):lth] - ps_2 <- ex %fin% y_2 | ex_bf %fin% y_2 | ex_af %fin% y_2 - ips_2 <- .Internal(which(ps_2)) + for (j in 1:nth) { + x <- tine2[[j]] + + if (is.na(x)) { + y <- tine2[[j+nth]] + + if (is.na(y)) { + iex[[j]] <- tine2[[j+nth*2]] + } else { + iex[[j]] <- y + } + } else { + iex[[j]] <- x + } + } - # b- and y-ions - expt_1 <- expts[ips_1] - expt_2 <- expts[ips_2] - expt_12 <- c(expt_1, expt_2) - ips_12 <- c(ips_1, ips_2) - len_12 <- length(expt_12) + nex <- length(iex) - # (occur rarely; OK to recalculate freshly `expt_12`) - if (n_ps != len_12) { + # should not occur? + if (nth != nex) { # "* 2" for three-frame searches - # also ensure that "ith = ips" in ascending order, not "iex = ips_12" - out_i <- find_ppm_outer_bycombi(expts, theos_i, ppm_ms2 * 2L) + # also ensure that "ith = ith" in ascending order, not "iex = iex" + out_i <- find_ppm_outer_bycombi(expts, theoi, ppm_ms2 * 2L) if (sum(!is.na(out_i[["expt"]])) < minn_ms2) { - out[[i]] <- list(theo = NULL, expt = NULL, ith = NULL, iex = NULL, m = NULL) + out[[i]] <- nullout next } @@ -1145,92 +1133,45 @@ find_ms2_bypep <- function (theos = NULL, expts = NULL, ex = NULL, d = NULL, next } - es[ps] <- expt_12 - - out[[i]] <- list(theo = theos_i, expt = es, ith = ips, iex = ips_12, m = len_12) + es[tine] <- expts[iex] + out[[i]] <- list(theo = theoi, expt = es, ith = ith, iex = iex, m = nex) } else - out[[i]] <- list(theo = NULL, expt = NULL, ith = NULL, iex = NULL, m = NULL) + out[[i]] <- nullout } } else { - # mths <- lapply(theos, index_mz, min_ms2mass, d) - # pss <- lapply(mths, function (x) x %fin% ex | (x - 1L) %fin% ex | (x + 1L) %fin% ex) - # ipss <- lapply(pss, function (x) .Internal(which(x))) - for (i in 1:len) { - ## forward matches - theos_i <- theos[[i]] - th_i <- index_mz(theos_i, min_ms2mass, d) - ps <- th_i %fin% ex | (th_i - 1L) %fin% ex | (th_i + 1L) %fin% ex - ips <- .Internal(which(ps)) - - ## "ith = ips" in ascending order, not "iex = ips_12" - - ### the remaining are the same under "if" ### + theoi <- theos[[i]] + thi <- index_mz(theoi, min_ms2mass, d) + t2e <- fastmatch::fmatch(c(thi, thi - 1L, thi + 1L), ex, nomatch = 0L) + nth <- sum(t2e > 0L) - ## backward matches - # expts are in ascending orders, but theos in b1, b2, ... , y1, y2, ... - # separate matches to theos_b and theos_y, each are in ascending order - n_ps <- length(ips) - - if(n_ps >= minn_ms2) { - # separated b and y matches (to handled double-dipping between b and y) - # (adj: bps <- fuzzy_match_one2(ex, th_i[1:mid])) - lth <- length(ps) - mid <- lth/2L - - # experimental es initially filled by NA and matched theoretical values, - # and at the end replaced with matched experimental values - es <- theos_i - es[!ps] <- NA_real_ - - ex_bf <- ex - 1L - ex_af <- ex + 1L - - # b-ions - y_1 <- th_i[1:mid] - ps_1 <- ex %fin% y_1 | ex_bf %fin% y_1 | ex_af %fin% y_1 - ips_1 <- .Internal(which(ps_1)) + if (nth >= minn_ms2) { + l <- length(thi) + mi <- t2e[1:l] + bf <- t2e[(l + 1L):(l + l)] + af <- t2e[(l + l + 1L):(l + l + l)] - # y-ions - y_2 <- th_i[(mid+1L):lth] - ps_2 <- ex %fin% y_2 | ex_bf %fin% y_2 | ex_af %fin% y_2 - ips_2 <- .Internal(which(ps_2)) + okmi <- mi > 0L + okbf <- bf > 0L + okaf <- af > 0L - # b- and y-ions - expt_1 <- expts[ips_1] - expt_2 <- expts[ips_2] - expt_12 <- c(expt_1, expt_2) - ips_12 <- c(ips_1, ips_2) - len_12 <- length(expt_12) + ith <- c(.Internal(which(okmi)), .Internal(which(okbf)), .Internal(which(okaf))) + iex <- c(mi[okmi], bf[okbf], af[okaf]) - # (occur rarely; OK to recalculate freshly `expt_12`) - if (n_ps != len_12) { - # "* 2" for three-frame searches - # also ensure that "ith = ips" in ascending order, not "iex = ips_12" - out_i <- find_ppm_outer_bycombi(expts, theos_i, ppm_ms2 * 2L) - - if (sum(!is.na(out_i[["expt"]])) < minn_ms2) { - out[[i]] <- list(theo = NULL, expt = NULL, ith = NULL, iex = NULL, m = NULL) - next - } - - out[[i]] <- out_i - next - } - - es[ps] <- expt_12 - - out[[i]] <- list(theo = theos_i, expt = es, ith = ips, iex = ips_12, m = len_12) + es <- rep_len(NA_real_, l) + es[ith] <- expts[iex] + names(es) <- names(theoi) + out[[i]] <- list(theo = theoi, expt = es, ith = ith, iex = iex, m = nth) } - else - out[[i]] <- list(theo = NULL, expt = NULL, ith = NULL, iex = NULL, m = NULL) + else { + out[[i]] <- nullout + } } } names(out) <- names(theos) - out }