Skip to content

Commit

Permalink
v1.2.6.3
Browse files Browse the repository at this point in the history
Codes optimization for matching theoretical and experimental MS2 features.
  • Loading branch information
qzhang503 committed Jun 4, 2023
1 parent 2bb14b5 commit b2bcd7d
Show file tree
Hide file tree
Showing 3 changed files with 89 additions and 148 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Package: mzion
Type: Package
Title: Database Searches of Proteomic Mass-spectrometrirc Data
Version: 1.2.6.2
Version: 1.2.6.3
Authors@R:
person(given = "Qiang",
family = "Zhang",
Expand Down
34 changes: 17 additions & 17 deletions R/ms2_gen.R
Original file line number Diff line number Diff line change
Expand Up @@ -555,15 +555,15 @@ calc_ms2ions_a1_vnl0_fnl0 <- function (M, P, aam, aa_masses, ntmass, ctmass,
out <- vector("list", nvm)

naa <- length(aam)
hex0 <- rep_len("0", naa)
hx0 <- rep_len("0", naa)

for (i in 1:nvm) {
vi <- P[i, ]
aam_i <- aam
aam_i[vi] <- aam_i[vi] + ds
out[[i]] <- ms2ions_by_type(aam_i, ntmass, ctmass, type_ms2ions, digits)

h <- hex0
h <- hx0
h[vi] <- mod_indexes[M]
names(out)[i] <- .Internal(paste0(list(h), collapse = "", recycle0 = FALSE))
}
Expand Down Expand Up @@ -848,7 +848,8 @@ gen_ms2ions_a1_vnl0_fnl1 <- function (aa_seq = NULL, ms1_mass = NULL,
}
}
else {
fnl_combi <- expand_grid_rows(fmods_nl[fmods_combi], nmax = maxn_fnl_per_seq)
fnl_combi <- expand_grid_rows(fmods_nl[fmods_combi], nmax = maxn_fnl_per_seq,
use.names = FALSE)
}

# most likely a list-one
Expand Down Expand Up @@ -905,7 +906,7 @@ calc_ms2ions_a1_vnl0_fnl1 <- function (M, P, fnl_combi, fnl_idxes,
r <- 1L

naa <- length(aam)
hex0 <- rep_len("0", naa)
hx0 <- rep_len("0", naa)

for (i in 1:nvm) {
vi <- P[i, ]
Expand All @@ -926,7 +927,7 @@ calc_ms2ions_a1_vnl0_fnl1 <- function (M, P, fnl_combi, fnl_idxes,
}
}

h <- hex0
h <- hx0
h[vi] <- mod_indexes[M]
h <- .Internal(paste0(list(h), collapse = "", recycle0 = FALSE))

Expand Down Expand Up @@ -1213,7 +1214,7 @@ gen_ms2ions_a1_vnl1_fnl0 <- function (aa_seq = NULL, ms1_mass = NULL,
mod_indexes = mod_indexes, digits = digits)
else
af <- calc_ms2ions_a1_vnl1_fnl0(
N = expand_grid_rows(vmods_nl[ms2vmods], nmax = nnl),
N = expand_grid_rows(vmods_nl[ms2vmods], nmax = nnl, use.names = FALSE),
M = M,
P = P,
aam = aam,
Expand Down Expand Up @@ -1243,8 +1244,7 @@ gen_ms2ions_a1_vnl1_fnl0 <- function (aa_seq = NULL, ms1_mass = NULL,
mod_indexes = mod_indexes, digits = digits)
}
else {
M <- split_matrix(M, by = "row")
af <- lapply(M, calc_ms2ions_a1_vnl0_fnl0,
af <- lapply(split_matrix(M, by = "row"), calc_ms2ions_a1_vnl0_fnl0,
P = P, aam = aam, aa_masses = aa_masses,
ntmass = ntmass, ctmass = ctmass, type_ms2ions = type_ms2ions,
mod_indexes = mod_indexes, digits = digits)
Expand All @@ -1255,7 +1255,7 @@ gen_ms2ions_a1_vnl1_fnl0 <- function (aa_seq = NULL, ms1_mass = NULL,
M <- split_matrix(M, by = "row")
l <- maxn_vmods_sitescombi_per_pep %/% n1

if (l == 1L) {
if (l <= 1L) {
af <- lapply(M, calc_ms2ions_a1_vnl0_fnl0,
P = P, aam = aam, aa_masses = aa_masses,
ntmass = ntmass, ctmass = ctmass, type_ms2ions = type_ms2ions,
Expand All @@ -1266,9 +1266,11 @@ gen_ms2ions_a1_vnl1_fnl0 <- function (aa_seq = NULL, ms1_mass = NULL,
n2 <- n1 * prod(lengths(vmods_nl))

N <- if (n2 > maxn_vmods_sitescombi_per_pep)
lapply(M, function (x) expand_grid_rows(vmods_nl[x], nmax = 2L))
lapply(M, function (x)
expand_grid_rows(vmods_nl[x], nmax = 2L, use.names = FALSE))
else
lapply(M, function (x) expand_grid_rows(vmods_nl[x], nmax = maxn_vnl_per_seq))
lapply(M, function (x)
expand_grid_rows(vmods_nl[x], nmax = maxn_vnl_per_seq, use.names = FALSE))

af <- mapply(
calc_ms2ions_a1_vnl1_fnl0,
Expand Down Expand Up @@ -1316,16 +1318,14 @@ calc_ms2ions_a1_vnl1_fnl0 <- function (N, M, P, aam, aa_masses,
type_ms2ions = "by", mod_indexes,
digits = 4L)
{
naa <- length(aam)
hex0 <- rep_len("0", naa)

ds <- aa_masses[M]
nnl <- length(N)
nvm <- nrow(P)
len <- nvm * nnl
out <- vector("list", len)

r <- 1L
naa <- length(aam)
hx0 <- rep_len("0", naa)
r <- 1L

for (i in 1:nvm) {
vi <- P[i, ]
Expand All @@ -1341,7 +1341,7 @@ calc_ms2ions_a1_vnl1_fnl0 <- function (N, M, P, aam, aa_masses,
}

# both i and j must exist
h <- hex0
h <- hx0
h[vi] <- mod_indexes[M]
h <- .Internal(paste0(list(h), collapse = "", recycle0 = FALSE))

Expand Down
201 changes: 71 additions & 130 deletions R/ms2frames.R
Original file line number Diff line number Diff line change
Expand Up @@ -1036,8 +1036,8 @@ fuzzy_match_one2 <- function (x, y)
#'
#' @return Lists of (1) theo, (2) expt, (3) ith, (4) iex and (5) m.
find_ms2_bypep <- function (theos = NULL, expts = NULL, ex = NULL, d = NULL,
ppm_ms2 = 10L, min_ms2mass = 115L, minn_ms2 = 6L,
index_mgf_ms2 = FALSE)
ppm_ms2 = 10L, min_ms2mass = 115L, minn_ms2 = 6L,
index_mgf_ms2 = FALSE)
{
##############################################################################
# `theos`
Expand All @@ -1050,187 +1050,128 @@ find_ms2_bypep <- function (theos = NULL, expts = NULL, ex = NULL, d = NULL,
# and never get matched.
#
# ex: `expts` in integers
# th_i: the i-th `theos` in integers
# thi: the i-th `theos` in integers
#
# ex has no duplicated entries;
# th_i can.
# thi can.
#
# Forward matching: match(theos, expts)
# (i) allowed, e.g., b4- and y5 theo ions matched to the same ex value:
# match(c(2,2,3,4), c(1:2, 5:10))
# (ii) multiple ex' value's to the same th_i value not allowed;
# (ii) multiple ex' value's to the same thi value not allowed;
# otherwise longer length `c(expts[bps], expts[yps])` than lhs.
# e.g. ex's 74953, 74955 both fit to th_i 74954 and the best one is applied.
# (after a th_i is matched, it will be removed from further matching)
# e.g. ex's 74953, 74955 both fit to thi 74954 and the best one is applied.
# (after a thi is matched, it will be removed from further matching)
#
# Backward matching: match(expts, theos)
# (i) %in% and %fin% only shows the first match for duplicated entries th_i:
# (i) %in% and %fin% only shows the first match for duplicated entries thi:
# match(1:4, c(1, 2, 2, 5))
# (so no worry about th_i duplication)
# (so no worry about thi duplication)
##############################################################################

nullout <- list(theo = NULL, expt = NULL, ith = NULL, iex = NULL, m = NULL)
len <- length(theos)

if (!len)
return(list(theo = NULL, expt = NULL, ith = NULL, iex = NULL, m = NULL))
return(nullout)

# ---
out <- vector("list", len)

## forward matches
if (len > 3L) {
mths <- index_mz(.Internal(unlist(theos, recursive = FALSE, use.names = FALSE)),
min_ms2mass, d)
pss <- mths %fin% ex | (mths - 1L) %fin% ex | (mths + 1L) %fin% ex
pss <- fold_vec(pss, len)
mths <- fold_vec(mths, len)
ipss <- lapply(pss, function (x) .Internal(which(x)))
tines <- mths %fin% ex | (mths - 1L) %fin% ex | (mths + 1L) %fin% ex
tines <- fold_vec(tines, len)
mths <- fold_vec(mths, len)
iths <- lapply(tines, function (x) .Internal(which(x)))

for (i in 1:len) {
theos_i <- theos[[i]]
th_i <- mths[[i]]
ps <- pss[[i]]
ips <- ipss[[i]]

### the remaining are the same as those under "else" ###
theoi <- theos[[i]]
thi <- mths[[i]]
tine <- tines[[i]]
ith <- iths[[i]]
nth <- length(ith)

## backward matches
# expts are in ascending orders, but theos in b1, b2, ... , y1, y2, ...
# separate matches to theos_b and theos_y, each are in ascending order
n_ps <- length(ips)

if(n_ps >= minn_ms2) {
# separated b and y matches (to handled double-dipping between b and y)
# (adj: bps <- fuzzy_match_one2(ex, th_i[1:mid]))
lth <- length(ps)
mid <- lth/2L

# experimental es initially filled by NA and matched theoretical values,
# and at the end replaced with matched experimental values
es <- theos_i
es[!ps] <- NA_real_
if (nth >= minn_ms2) {
es <- theoi
es[!tine] <- NA_real_

ex_bf <- ex - 1L
ex_af <- ex + 1L
thok <- thi[ith]
tine2 <- fastmatch::fmatch(c(thok, thok - 1L, thok + 1L), ex) # 2us
iex <- vector("integer", nth)

# b-ions
y_1 <- th_i[1:mid]
ps_1 <- ex %fin% y_1 | ex_bf %fin% y_1 | ex_af %fin% y_1
ips_1 <- .Internal(which(ps_1))

# y-ions
y_2 <- th_i[(mid+1L):lth]
ps_2 <- ex %fin% y_2 | ex_bf %fin% y_2 | ex_af %fin% y_2
ips_2 <- .Internal(which(ps_2))
for (j in 1:nth) {
x <- tine2[[j]]

if (is.na(x)) {
y <- tine2[[j+nth]]

if (is.na(y)) {
iex[[j]] <- tine2[[j+nth*2]]
} else {
iex[[j]] <- y
}
} else {
iex[[j]] <- x
}
}

# b- and y-ions
expt_1 <- expts[ips_1]
expt_2 <- expts[ips_2]
expt_12 <- c(expt_1, expt_2)
ips_12 <- c(ips_1, ips_2)
len_12 <- length(expt_12)
nex <- length(iex)

# (occur rarely; OK to recalculate freshly `expt_12`)
if (n_ps != len_12) {
# should not occur?
if (nth != nex) {
# "* 2" for three-frame searches
# also ensure that "ith = ips" in ascending order, not "iex = ips_12"
out_i <- find_ppm_outer_bycombi(expts, theos_i, ppm_ms2 * 2L)
# also ensure that "ith = ith" in ascending order, not "iex = iex"
out_i <- find_ppm_outer_bycombi(expts, theoi, ppm_ms2 * 2L)

if (sum(!is.na(out_i[["expt"]])) < minn_ms2) {
out[[i]] <- list(theo = NULL, expt = NULL, ith = NULL, iex = NULL, m = NULL)
out[[i]] <- nullout
next
}

out[[i]] <- out_i
next
}

es[ps] <- expt_12

out[[i]] <- list(theo = theos_i, expt = es, ith = ips, iex = ips_12, m = len_12)
es[tine] <- expts[iex]
out[[i]] <- list(theo = theoi, expt = es, ith = ith, iex = iex, m = nex)
}
else
out[[i]] <- list(theo = NULL, expt = NULL, ith = NULL, iex = NULL, m = NULL)
out[[i]] <- nullout
}
}
else {
# mths <- lapply(theos, index_mz, min_ms2mass, d)
# pss <- lapply(mths, function (x) x %fin% ex | (x - 1L) %fin% ex | (x + 1L) %fin% ex)
# ipss <- lapply(pss, function (x) .Internal(which(x)))

for (i in 1:len) {
## forward matches
theos_i <- theos[[i]]
th_i <- index_mz(theos_i, min_ms2mass, d)
ps <- th_i %fin% ex | (th_i - 1L) %fin% ex | (th_i + 1L) %fin% ex
ips <- .Internal(which(ps))

## "ith = ips" in ascending order, not "iex = ips_12"

### the remaining are the same under "if" ###
theoi <- theos[[i]]
thi <- index_mz(theoi, min_ms2mass, d)
t2e <- fastmatch::fmatch(c(thi, thi - 1L, thi + 1L), ex, nomatch = 0L)
nth <- sum(t2e > 0L)

## backward matches
# expts are in ascending orders, but theos in b1, b2, ... , y1, y2, ...
# separate matches to theos_b and theos_y, each are in ascending order
n_ps <- length(ips)

if(n_ps >= minn_ms2) {
# separated b and y matches (to handled double-dipping between b and y)
# (adj: bps <- fuzzy_match_one2(ex, th_i[1:mid]))
lth <- length(ps)
mid <- lth/2L

# experimental es initially filled by NA and matched theoretical values,
# and at the end replaced with matched experimental values
es <- theos_i
es[!ps] <- NA_real_

ex_bf <- ex - 1L
ex_af <- ex + 1L

# b-ions
y_1 <- th_i[1:mid]
ps_1 <- ex %fin% y_1 | ex_bf %fin% y_1 | ex_af %fin% y_1
ips_1 <- .Internal(which(ps_1))
if (nth >= minn_ms2) {
l <- length(thi)
mi <- t2e[1:l]
bf <- t2e[(l + 1L):(l + l)]
af <- t2e[(l + l + 1L):(l + l + l)]

# y-ions
y_2 <- th_i[(mid+1L):lth]
ps_2 <- ex %fin% y_2 | ex_bf %fin% y_2 | ex_af %fin% y_2
ips_2 <- .Internal(which(ps_2))
okmi <- mi > 0L
okbf <- bf > 0L
okaf <- af > 0L

# b- and y-ions
expt_1 <- expts[ips_1]
expt_2 <- expts[ips_2]
expt_12 <- c(expt_1, expt_2)
ips_12 <- c(ips_1, ips_2)
len_12 <- length(expt_12)
ith <- c(.Internal(which(okmi)), .Internal(which(okbf)), .Internal(which(okaf)))
iex <- c(mi[okmi], bf[okbf], af[okaf])

# (occur rarely; OK to recalculate freshly `expt_12`)
if (n_ps != len_12) {
# "* 2" for three-frame searches
# also ensure that "ith = ips" in ascending order, not "iex = ips_12"
out_i <- find_ppm_outer_bycombi(expts, theos_i, ppm_ms2 * 2L)

if (sum(!is.na(out_i[["expt"]])) < minn_ms2) {
out[[i]] <- list(theo = NULL, expt = NULL, ith = NULL, iex = NULL, m = NULL)
next
}

out[[i]] <- out_i
next
}

es[ps] <- expt_12

out[[i]] <- list(theo = theos_i, expt = es, ith = ips, iex = ips_12, m = len_12)
es <- rep_len(NA_real_, l)
es[ith] <- expts[iex]
names(es) <- names(theoi)
out[[i]] <- list(theo = theoi, expt = es, ith = ith, iex = iex, m = nth)
}
else
out[[i]] <- list(theo = NULL, expt = NULL, ith = NULL, iex = NULL, m = NULL)
else {
out[[i]] <- nullout
}
}
}

names(out) <- names(theos)

out
}

Expand Down

0 comments on commit b2bcd7d

Please sign in to comment.