diff --git a/R/bin_masses.R b/R/bin_masses.R index 35fd638..952aab5 100644 --- a/R/bin_masses.R +++ b/R/bin_masses.R @@ -361,7 +361,7 @@ set_bin_ncores <- function (len_m, enzyme) else min(n_cores, len_m) - if (enzyme == "noenzyme") + if (isTRUE(enzyme == "noenzyme")) n_cores <- floor(n_cores/2L) max(1L, n_cores) diff --git a/R/mgfs.R b/R/mgfs.R index 20541ae..9f3171e 100644 --- a/R/mgfs.R +++ b/R/mgfs.R @@ -64,7 +64,7 @@ load_mgfs <- function (out_path, mgf_path, min_mass = 200L, max_mass = 4500L, ok_pars <- identical(call_pars, cache_pars) # suboptimal for handling matchMS_noenzyme() - if ((!ok_pars) && enzyme == "noenzyme") + if ((!ok_pars) && isTRUE(enzyme == "noenzyme")) ok_pars <- TRUE # checks processed mgfs @@ -1164,7 +1164,7 @@ extract_mgf_rptrs <- function (xvals, yvals, quant = "none", tmt_reporter_upper = 135.2, exclude_reporter_region = FALSE) { - if (grepl("^tmt.*\\d+", quant)) { + if (isTRUE(grepl("^tmt.*\\d+", quant))) { ok_rptrs <- lapply(xvals, function (x) x > tmt_reporter_lower & x < tmt_reporter_upper) rptr_moverzs <- mapply(function (x, y) x[y], xvals, ok_rptrs, @@ -1312,13 +1312,17 @@ find_mgf_type <- function (file) scan_pd <- "scans: \"\\d+\"" scan_default_pasef <- NULL - if (grepl(file_msconvert_thermo, ln_tit) && grepl(scan_msconv_thermo, ln_tit)) + if (isTRUE(grepl(file_msconvert_thermo, ln_tit)) && + isTRUE(grepl(scan_msconv_thermo, ln_tit))) "msconv_thermo" - else if (grepl(file_msconvert_pasef, ln_tit) && grepl(scan_msconv_pasef, ln_tit)) + else if (isTRUE(grepl(file_msconvert_pasef, ln_tit)) && + isTRUE(grepl(scan_msconv_pasef, ln_tit))) "msconv_pasef" - else if (grepl(file_pd, ln_tit) && grepl(scan_pd, ln_tit)) + else if (isTRUE(grepl(file_pd, ln_tit)) && + isTRUE(grepl(scan_pd, ln_tit))) "pd" - else if (grepl(file_default_pasef, ln_tit) && is.null(scan_default_pasef)) + else if (isTRUE(grepl(file_default_pasef, ln_tit)) && + isTRUE(is.null(scan_default_pasef))) "default_pasef" else stop("Unkown format of MGFs.") @@ -2020,6 +2024,8 @@ proc_mdda <- function (spec, raw_file, idx_sc = 3L, idx_osc = 3L, idx_mslev = 2L ms1_moverzs <- ms1_ints <- ms1_charges <- vector("list", len) ms0_moverzs <- ms0_ints <- ms0_charges <- character(len) + is_tmt <- if (isTRUE(grepl("^tmt.*\\d+", quant))) TRUE else FALSE + for (i in 1:len) { x <- spec[[i]] ids <- .Internal(strsplit(xml2::xml_attr(x, "id"), " ", fixed = TRUE, @@ -2032,9 +2038,7 @@ proc_mdda <- function (spec, raw_file, idx_sc = 3L, idx_osc = 3L, idx_mslev = 2L xc <- xml2::xml_children(x) ms_levs[[i]] <- ms_lev <- as.integer(xml2::xml_attr(xc[[idx_mslev]], "value")) scan_titles[[i]] <- xml2::xml_attr(xc[[idx_title]], "value") - - is_tmt <- if (grepl("^tmt.*\\d+", quant)) TRUE else FALSE - + if (ms_lev == 2L) { scanList <- xml2::xml_children(xc[[idx_scanList_2]]) scanList_ret <- xml2::xml_children(scanList[[idx_rt_2]]) @@ -2262,9 +2266,9 @@ proc_dia <- function (spec, raw_file, is_demux = FALSE, idx_sc = 5L, idx_osc = 3 ms_levs <- msx_ns <- integer(len) msx_moverzs <- msx_ints <- ms2_charges <- vector("list", len) ms1_moverzs <- ms1_ints <- ms1_charges <- vector("list", len) - demux <- if (is_demux) rep("0", len) else NULL + demux <- if (is_demux) rep_len("0", len) else NULL - is_tmt <- if (grepl("^tmt.*\\d+", quant)) TRUE else FALSE + is_tmt <- if (isTRUE(grepl("^tmt.*\\d+", quant))) TRUE else FALSE for (i in 1:len) { x <- spec[[i]] @@ -2477,7 +2481,7 @@ proc_dda <- function (spec, raw_file, idx_sc = 3L, idx_osc = 3L, msx_moverzs <- msx_ints <- ms2_charges <- vector("list", len) ms1_moverzs <- ms1_ints <- ms1_charges <- character(len) - is_tmt <- if (grepl("^tmt.*\\d+", quant)) TRUE else FALSE + is_tmt <- if (isTRUE(grepl("^tmt.*\\d+", quant))) TRUE else FALSE for (i in 1:len) { x <- spec[[i]] diff --git a/R/ms1_precursors.R b/R/ms1_precursors.R index fea449a..cd25c32 100644 --- a/R/ms1_precursors.R +++ b/R/ms1_precursors.R @@ -245,6 +245,7 @@ calc_pepmasses2 <- function (aa_masses = NULL, is_fixed_protct <- any(grepl("Protein C-term", fixedmods)) # --- Forward sequences --- + # (enzyme can be NULL) if (isTRUE(enzyme == "noenzyme")) { if (max_len > 25L) warning("May be out of RAM at `max_len = ", max_len, "`.\n", @@ -334,7 +335,7 @@ calc_pepmasses2 <- function (aa_masses = NULL, fwd_peps <- chunksplit(fwd_peps, n_cores, "list") # (a) Optional semi-enzymatic peptides - if (grepl("^semi", enzyme)) { + if (isTRUE(grepl("^semi", enzyme))) { message("Generating semi-enzymatic peptides.") cl <- parallel::makeCluster(getOption("cl.cores", n_cores)) @@ -778,7 +779,7 @@ find_aa_site <- function (pos_site) { nms <- names(pos_site) - site <- if (grepl("[NC]{1}-term", nms)) + site <- if (isTRUE(grepl("[NC]{1}-term", nms))) gsub("(Protein|Any) ([NC]{1}-term)", "\\2", nms) else pos_site @@ -1955,9 +1956,25 @@ make_fastapeps0 <- function (fasta_db, enzyme = "trypsin_p", custom_enzyme = NUL inds_m <- grep("^M", fasta_db) if (is.null(enzyme)) { - patc <- custom_enzyme[["Cterm"]] - patn <- custom_enzyme[["Nterm"]] - + # already checked custom_enzyme names in matchMS + if ((len_enz <- length(custom_enzyme)) == 2L) { + patc <- custom_enzyme[["Cterm"]] + patn <- custom_enzyme[["Nterm"]] + } + else if (len_enz == 1L) { + if ((tm_enz <- names(custom_enzyme)) == "Cterm") { + patc <- custom_enzyme[["Cterm"]] + patn <- NULL + } + else if (tm_enz == "Nterm") { + patc <- NULL + patn <- custom_enzyme[["Nterm"]] + } + else { + stop("Custom enzyme terminal is not `Cterm` or `Nterm`.") + } + } + if (!is.null(patc)) { if (grepl("\\^", patc)) { fasta_db <- lapply(fasta_db, function (x) @@ -2024,7 +2041,7 @@ make_fastapeps0 <- function (fasta_db, enzyme = "trypsin_p", custom_enzyme = NUL fixed = FALSE, useBytes = FALSE)), "-")) } - else if (enzyme == "argc"|| enzyme == "semiargc") { + else if (enzyme == "argc" || enzyme == "semiargc") { fasta_db <- lapply(fasta_db, function (x) paste0("-", .Internal(gsub("([R]{1})", paste0("\\1", "@"), x, @@ -2032,7 +2049,7 @@ make_fastapeps0 <- function (fasta_db, enzyme = "trypsin_p", custom_enzyme = NUL fixed = FALSE, useBytes = FALSE)), "-")) } - else if (enzyme == "lysc_p"|| enzyme == "semilysc_p") { + else if (enzyme == "lysc_p" || enzyme == "semilysc_p") { fasta_db <- lapply(fasta_db, function (x) paste0("-", .Internal(gsub("([K]{1})([^P]{1})", paste0("\\1", "@", "\\2"), x, @@ -2040,7 +2057,7 @@ make_fastapeps0 <- function (fasta_db, enzyme = "trypsin_p", custom_enzyme = NUL fixed = FALSE, useBytes = FALSE)), "-")) } - else if (enzyme == "chymotrypsin"|| enzyme == "semichymotrypsin") { + else if (enzyme == "chymotrypsin" || enzyme == "semichymotrypsin") { fasta_db <- lapply(fasta_db, function (x) paste0("-", .Internal(gsub("([FWY]{1})", paste0("\\1", "@"), x, @@ -2048,7 +2065,7 @@ make_fastapeps0 <- function (fasta_db, enzyme = "trypsin_p", custom_enzyme = NUL fixed = FALSE, useBytes = FALSE)), "-")) } - else if (enzyme == "gluc"|| enzyme == "semigluc") { + else if (enzyme == "gluc" || enzyme == "semigluc") { fasta_db <- lapply(fasta_db, function (x) paste0("-", .Internal(gsub("([E]{1})", paste0("\\1", "@"), x, @@ -2056,7 +2073,7 @@ make_fastapeps0 <- function (fasta_db, enzyme = "trypsin_p", custom_enzyme = NUL fixed = FALSE, useBytes = FALSE)), "-")) } - else if (enzyme == "glun"|| enzyme == "semiglun") { + else if (enzyme == "glun" || enzyme == "semiglun") { fasta_db <- lapply(fasta_db, function (x) paste0("-", .Internal(gsub("([E]{1})", paste0("@", "\\1"), x, @@ -2064,7 +2081,7 @@ make_fastapeps0 <- function (fasta_db, enzyme = "trypsin_p", custom_enzyme = NUL fixed = FALSE, useBytes = FALSE)), "-")) } - else if (enzyme == "aspc"|| enzyme == "semiaspc") { + else if (enzyme == "aspc" || enzyme == "semiaspc") { fasta_db <- lapply(fasta_db, function (x) paste0("-", .Internal(gsub("([D]{1})", paste0("\\1", "@"), x, @@ -2072,7 +2089,7 @@ make_fastapeps0 <- function (fasta_db, enzyme = "trypsin_p", custom_enzyme = NUL fixed = FALSE, useBytes = FALSE)), "-")) } - else if (enzyme == "aspn"|| enzyme == "semiaspn") { + else if (enzyme == "aspn" || enzyme == "semiaspn") { fasta_db <- lapply(fasta_db, function (x) paste0("-", .Internal(gsub("([D]{1})", paste0("@", "\\1"), x, @@ -2867,7 +2884,8 @@ calcms1mass_noterm_byprot <- function (prot_peps, aa_masses, calcms1mass_noterm_bypep <- function (aa_seq, aa_masses, maxn_vmods_per_pep = 5L, maxn_sites_per_vmod = 3L) { - if (is.na(aa_seq)) return(NULL) + if (is.na(aa_seq)) + return(NULL) aas <- .Internal(strsplit(aa_seq, "", fixed = TRUE, perl = FALSE, useBytes = FALSE)) @@ -2915,15 +2933,22 @@ distri_peps <- function (prps, aa_masses_all, motifs_all, max_miss = 2L, # semi enzymes: don't know the maximum number of # C-term peptides that can end with "-" # (N-term remains the same) - n1 <- if (enzyme == "noenzyme") - Inf - else - (max_miss + 1L) * 2L - - n2 <- if (grepl("^semi", enzyme) || enzyme == "noenzyme") - Inf - else - ct_counts(max_miss) + + if (is.null(enzyme)) { + n1 <- (max_miss + 1L) * 2L + n2 <- ct_counts(max_miss) + } + else { + n1 <- if (enzyme == "noenzyme") + Inf + else + (max_miss + 1L) * 2L + + n2 <- if (grepl("^semi", enzyme) || enzyme == "noenzyme") + Inf + else + ct_counts(max_miss) + } # ZN207_HUMAN: MGRKKKK (no N-term pep_seq at 2 misses and min_len >= 7L) @@ -2945,7 +2970,7 @@ ct_counts <- function (max_miss = 2L) { ct <- integer(max_miss) - if (max_miss > 0L) { + if (max_miss) { ct[1] <- 2L for (i in 1:max_miss) { diff --git a/R/msmsmatches.R b/R/msmsmatches.R index 3667c94..af10745 100644 --- a/R/msmsmatches.R +++ b/R/msmsmatches.R @@ -829,8 +829,19 @@ matchMS <- function (out_path = "~/mzion/outs", if ((!is.na(topn_ms2ion_cuts)) && (topn_ms2ion_cuts == "")) topn_ms2ion_cuts <- NA if (!(is.numeric(ms1_notches) && length(ms1_notches))) ms1_notches <- 0 if (is.null(noenzyme_maxn)) noenzyme_maxn <- 0L - if ((!is.null(custom_enzyme)) && custom_enzyme == "") - custom_enzyme = c(Cterm = NULL, Nterm = NULL) + + if (!is.null(custom_enzyme)) { + if (!all(names(custom_enzyme) %in% c("Cterm", "Nterm"))) + stop("Custom enzyme terminals need to be `Cterm` and/or `Nterm`.") + + if (length(custom_enzyme) > 2L) + stop("Custom enzyme cannot have more than 2 terminals.") + + custom_enzyme <- custom_enzyme[custom_enzyme != ""] + + if (!length(custom_enzyme)) + custom_enzyme = c(Cterm = NULL, Nterm = NULL) + } oks <- fasta != "" @@ -1571,10 +1582,12 @@ matchMS <- function (out_path = "~/mzion/outs", if (bypass_protacc && file.exists(file_protacc)) df <- qs::qread(file_protacc) else { - if (enzyme != "noenzyme" || isTRUE(dots[["direct_prot_acc"]])) + if (is.null(enzyme) || + (enzyme != "noenzyme" || isTRUE(dots[["direct_prot_acc"]]))) { df <- add_protacc(out_path = out_path, .path_cache = .path_cache, .path_fasta = .path_fasta) + } else { silac_noenzyme <- if (isTRUE(dots$silac_noenzyme)) TRUE else FALSE @@ -2064,6 +2077,10 @@ check_tmt_pars <- function (fixedmods, varmods, quant) tmts <- fvmods[grepl("^TMT", fvmods)] + if (!length(tmts)) + warning("No fixed or variable modifications of TMT were specified at ", + "\"quant = ", quant, "\"") + if (quant == "tmt18") { ok <- all(grepl("TMTpro18.* |TMT18plex.* ", tmts)) diff --git a/R/scores.R b/R/scores.R index a985b50..61ddea2 100644 --- a/R/scores.R +++ b/R/scores.R @@ -1757,8 +1757,12 @@ prep_pepfdr_td <- function (td = NULL, out_path, enzyme = "trypsin_p", top3s <- cts$pep_mod_group[which_topx2(cts$n, 3)[1:3]] top3s <- top3s[!is.na(top3s)] - enzyme <- tolower(enzyme) - is_nes <- enzyme == "noenzyme" || grepl("^semi", enzyme) + if (is.null(enzyme)) + is_nes <- FALSE + else { + enzyme <- tolower(enzyme) + is_nes <- isTRUE(enzyme == "noenzyme") || isTRUE(grepl("^semi", enzyme)) + } if (!is_nes) if (!nes_fdr_group %in% c("all", "base"))