-
Notifications
You must be signed in to change notification settings - Fork 46
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Faster quantify functions #130
Comments
For completeness, here's some testing/devel code I wrote that made it into ## quantifies peak pk in regions [pk - wd, pk + wd] at half window
## size hws in spectrum spi in file f using max of peak (irrespective
## if spectrum is centroided or profile mode)
fastquant_1 <- function(f, pk, spi, hws, wd = 1) {
ramp <- new(mzR:::.__C__Rcpp_Ramp)
ramp$open(f, declaredOnly = TRUE)
if (!ramp$OK())
stop("Unable to create valid cRamp object.")
on.exit(ramp$close())
pks <- ramp$getPeakList(spi)$peaks
pks <- pks[pks[, 1] >= pk - wd & pks[, 1] <= pk + wd, ]
mx <- MALDIquant:::.localMaxima(pks[, 2], hws)
pks[mx, 2]
}
fastquant(x2[[372]], reporters = TMT6[1], method = "max")[[1]]
## TMT6.126 TMT6.127 TMT6.128 TMT6.129 TMT6.130 TMT6.131
## X422.1 56807640 60283748 63901644 57897180 54275908 53960332
fastquant_1(f, 126, 422, 20)
fastquant_1(f, 127, 422, 20)
fastquant_1(f, 128, 422, 20)
fastquant_1(f, 129, 422, 20)
fastquant_1(f, 130, 422, 20)
fastquant_1(f, 131, 422, 20)
r1 <- quantify(x2[[372]], reporters = TMT6[1], method = "max")[[1]]
names(r1) <- NULL
r2 <- fastquant_1(f, 126, 422, 20)
identical(r1, r2)
microbenchmark(quantify(x2[[372]], reporters = TMT6, method = "max")[[1]],
fastquant_1(f, 126, 422, 20),
times = 10) ## quantifies peaks pk in regions [pks - wd, pks + wd] at half window
## size hws in spectrum spi in file f using max of peak (irrespective
## if spectrum is centroided or profile mode)
fastquant_n <- function(f, pk, spi, hws, wd = 1) {
ramp <- new(mzR:::.__C__Rcpp_Ramp)
ramp$open(f, declaredOnly = TRUE)
if (!ramp$OK())
stop("Unable to create valid cRamp object.")
on.exit(ramp$close())
pks <- ramp$getPeakList(spi)$peaks
res <- rep(NA_real_, length(pk))
for (ii in seq_along(pk)) {
k <- pks[, 1] >= pk[ii] - wd & pks[, 1] <= pk[ii] + wd
.pks <- pks[k, , drop = FALSE]
mx <- MALDIquant:::.localMaxima(.pks[, 2], hws)
res[ii] <- .pks[mx, 2]
}
res
}
quantify(x2[[372]], reporters = TMT6, method = "max")[[1]]
fastquant_n(f, pk = mz(TMT6), 422, 20)
r1 <- quantify(x2[[372]], reporters = TMT6, method = "max")[[1]]
names(r1) <- NULL
r2 <- fastquant_n(f, mz(TMT6), 422, 20, wd = 0.5)
identical(r1, r2)
microbenchmark(quantify(x2[[372]], reporters = TMT6, method = "max")[[1]],
fastquant_n(f, mz(TMT6), 422, 20, wd = 0.5),
times = 1) ## quantifies peaks pk in regions [pks - wd, pks + wd] at half window
## size hws in spectra spi in file f using max of peak (irrespective
## if spectrum is centroided or profile mode)
fastquant_nn <- function(f, pk, spi, hws, wd = 0.5) {
ramp <- openMSfile(f)
on.exit(close(ramp))
pks <- peaks(ramp, spi)
res <- matrix(NA_real_,
ncol = length(pk),
nrow = length(pks))
for (i in seq_along(pks)) {
for (ii in seq_along(pk)) {
k <- pks[[i]][, 1] >= pk[ii] - wd & pks[[i]][, 1] <= pk[ii] + wd
if (any(k)) {
.pks <- pks[[i]][k, , drop = FALSE]
mx <- MALDIquant:::.localMaxima(.pks[, 2], hws)
res[i, ii] <- .pks[mx, 2]
}
}
}
res
}
library("MSnbase")
x2 <- readMSData2(f, msLevel = 2, centroided = TRUE)
ii <- fData(x2)$spIdx
library("microbenchmark")
microbenchmark(r1 <- quantify(x2, reporters = TMT6, verbose = FALSE,
method = "max", BPPARAM = SerialParam()),
r2 <- fastquant_nn(f, pk = mz(TMT6), spi = ii, hws = 20L, wd = 0.05),
times = 5)
## Unit: milliseconds
## min lq mean median uq max neval
## 18720.0183 18758.3670 22544.4527 19908.1385 27524.8491 27810.891 5
## 468.5377 471.3297 517.8705 472.1369 577.6511 599.697 5
## min lq mean median uq max neval
## 18695.8108 18908.834 19187.2583 18963.9148 19216.6528 20151.0787 5
## 468.3613 469.569 471.0501 471.4552 471.9763 473.8886 5
r1 <- exprs(r1)
attributes(r1) <- attributes(r1)[1]
identical(r1, r2)
## Code to inspect if not identical
sel <- !is.na(r1) & !is.na(r2)
r1[!sel] <- 0
r2[!sel] <- 0
table(r1 == r2)
kk <- arrayInd(which(!r1 == r2), dim(r1))
i <- sort(unique(kk[, 1]))
r1[i, ]
r2[i, ]
ssp <- lapply(spectra(x2[i]), as, "data.frame")
for (j in 1:length(ssp)) {
x <- ssp[[j]]
x <- x[x[, 1] > 120 & x[, 1] < 132, ]
plot(x, type = "h", xlim = c(125, 132), ylim = c(0, max(x, 2)))
grid()
abline(v = mz(TMT6), col = "red", lty = "dotted")
points(mz(TMT6), r1[i[j], ])
points(mz(TMT6), r2[i[j], ], col = "red", pch = 3)
tmp <- rbind(r1[i[j], ], r2[i[j], ])
colnames(tmp) <- mz(TMT6)
print(tmp)
scan()
} f2 <- "~/Data2/PXD000001-new-files/TMT_Erwinia_1uLSike_Top10HCD_isol2_45stepped_60min_01-20141210.mzML"
library("MSnbase")
x2 <- readMSData2(f, msLevel = 2, centroided = TRUE)
x <- readMSData(f, msLevel = 2, centroided = TRUE)
qe0 <- quantify(x, reporters = TMT6, verbose = FALSE, method = "max")
qe00 <- quantify(x2, reporters = TMT6, verbose = FALSE, method = "max")
all.equal(exprs(qe0), exprs(qe00), check.attributes = FALSE)
x2 <- readMSData2(f2, msLevel = 2, centroided = TRUE)
qe00 <- quantify(x2, reporters = TMT6, method = "max", BPPARAM = MulticoreParam(4L))
qe <- MSnbase:::fastquant_max(f2, pk = mz(TMT6), spi = fData(x2)$spIdx,
hws = 20L, wd = 0.05)
all.equal(qe, exprs(qe00), check.attributes = FALSE) I will document these benchmarks in the |
This is for TMT quantitation. So we are just interested in the highest peak in a reporter region (not in all local maxima). While ## reporter mz
pk <- 10
## stupid spectrum with two local maxima in a reporter region
pks <- list(cbind(mz=c(seq(9.2, 10.8, by=0.1)),
intensity=c(1:5, 4:1, 2:6, 5:3)))
## modified fastquant_max to work without a file
fastquant_max_org <- function(pks, pk, hws=20L, wd = 0.5) {
res <- matrix(NA_real_,
ncol = length(pk),
nrow = length(pks))
for (i in seq_along(pks)) {
for (ii in seq_along(pk)) {
k <- pks[[i]][, 1] >= pk[ii] - wd & pks[[i]][, 1] <= pk[ii] + wd
if (any(k)) {
.pks <- pks[[i]][k, , drop = FALSE]
mx <- MALDIquant:::.localMaxima(.pks[, 2], hws)
res[i, ii] <- .pks[mx, 2]
}
}
}
res
}
## small hws
fastquant_max_org(pks, pk, hws=2)
# Error in res[i, ii] <- .pks[mx, 2] :
# number of items to replace is not a multiple of replacement length I replaced |
Thanks @sgibb. The function currently is used for iTRAQ/TMT quantitation indeed, but I would like it to be more general, or have one available for any peak (MS1 or MS2, centroided or profile). In your example, I would actually want the second max at M/Z 10.5 to be chosen. In that respect, the org function does have a bug, but I don't think using |
Quantification needs to be reimplemented at the C-level or using on disk access and using basic data types to make full use of the
OnDiskMSnExp
data structure.Quantitation involves peak integration, which will be useful for MS1 and MS2 quantitation. The way this is done will depend if data is centroided (only really relevant for MS2 data) or profile mode. When in profile mode, the peak is a curve; in centroided mode, the peak is a sticky peak.
What follows is a crude description of the needs. I will refine later.
quantify
range
findPeak(mz, tolerance)
quantify(method = "max")
quantify(method = "trapezoidation")
findPeak(mz, tolerance)
MALDIquant:::.localMaxima
, with appropriatehalfWindowSize
.want to to find the lower/left and upper/right ends of the curve to integrate.
Summary of
quantify
methods:max
fastquant_max
SI
SI
SI
SAF
SAF
count_MSnSet
tic_MSnSet
Some functions are the same (after minor modifications) than for the
MSnExp
:count_MSnSet
,tic_MSnSet
,SI
andSAF
.[1] this one could actually climb up outide of
mz
+/-wd
to detect to maximum.[2] actually not directly available as a
method
, used bySI
,SIgi
andSIn
.The text was updated successfully, but these errors were encountered: