diff --git a/DESCRIPTION b/DESCRIPTION index 591921a..ba4c39b 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: mzion Type: Package Title: Database Searches of Proteomic Mass-spectrometrirc Data -Version: 1.2.5 +Version: 1.2.5.2 Authors@R: person(given = "Qiang", family = "Zhang", diff --git a/R/funs.R b/R/funs.R index 9e5bb2e..58070ab 100644 --- a/R/funs.R +++ b/R/funs.R @@ -3,8 +3,8 @@ # [6] "check_ms2frames" # # $bin_masses.R -# [1] "bin_ms1masses" "binTheoSeqs_i" "binTheoSeqs2" "bin_theoseqs" "binTheoSeqs" -# [6] "find_ms1_cutpoints" "s_readRDS" +# [1] "bin_ms1masses" "binTheoSeqs_i" "binTheoSeqs2" "bin_theoseqs" "binTheoSeqs" "find_ms1_cutpoints" +# [7] "s_readRDS" # # $dispatch.R # [1] "find_pos_site" "contain_pos_site" "contain_termpos_any" "subset_by_prps" "subset_protntsite" @@ -19,23 +19,21 @@ # character(0) # # $ion_ladder.R -# [1] "ms2ions_by_type" "byions" "czions" "axions" "bions_base" "yions_base" -# [7] "b2ions_base" "bstarions" "bstar2ions" "b0ions" "b02ions" "y2ions" -# [13] "ystarions" "ystar2ions" "y0ions" "y02ions" "cions_base" "c2ions" -# [19] "zions_base" "z2ions" "aions_base" "a2ions" "astarions" "astar2ions" -# [25] "a0ions" "a02ions" "xions_base" "x2ions" +# [1] "ms2ions_by_type" "byions" "czions" "axions" "bions_base" "yions_base" "b2ions_base" +# [8] "bstarions" "bstar2ions" "b0ions" "b02ions" "y2ions" "ystarions" "ystar2ions" +# [15] "y0ions" "y02ions" "cions_base" "c2ions" "zions_base" "z2ions" "aions_base" +# [22] "a2ions" "astarions" "astar2ions" "a0ions" "a02ions" "xions_base" "x2ions" # # $mapMS2ions.R -# [1] "mapMS2ions" "match_mgf_path" "match_raw_id" "add_raw_ids" -# [5] "find_secion_types" "find_psm_rows" "find_psm_rows1" "find_psm_rows2" -# [9] "find_theoexpt_pair" "find_mgf_query" "combine_prisec_matches" "check_existed_psms" -# [13] "get_mzion_coltypes" +# [1] "mapMS2ions" "match_mgf_path" "match_raw_id" "add_raw_ids" "find_secion_types" +# [6] "find_psm_rows" "find_psm_rowsQ" "find_psm_rowsC" "find_theoexpt_pair" "find_mgf_query" +# [11] "combine_prisec_matches" "check_existed_psms" "get_mzion_coltypes" # # $mgfs.R -# [1] "load_mgfs" "readMGF" "post_readmgf" "readlineMGFs" " f" -# [6] "read_mgf_chunks" "proc_mgf_chunks" "proc_mgfs" "sub_mgftopn" "integerize_ms2ints" -# [11] "extract_mgf_rptrs" "find_ms1_interval" "index_mz" "find_mgf_type" "readmzML" -# [16] "proc_mzml" "read_mzml" +# [1] "load_mgfs" "readMGF" "post_readmgf" "readlineMGFs" " f" "read_mgf_chunks" +# [7] "proc_mgf_chunks" "proc_mgfs" "sub_mgftopn" "integerize_ms2ints" "extract_mgf_rptrs" "find_ms1_interval" +# [13] "index_mz" "find_mgf_type" "readmzML" "proc_mzml" "read_mzml" "prepBrukerMGF" +# [19] "mprepBrukerMGF" # # $ms1_precursors.R # [1] "calc_pepmasses2" "find_aa_masses" "find_motif_pat" "simple_prots_peps" @@ -54,29 +52,21 @@ # [53] "delta_ms1_a0_fnl1" "hms1_a0_vnl0_fnl1" "ms1_a0_vnl0_fnl1" "hms1_a1_vnl0_fnl0" # [57] "ms1_a1_vnl0_fnl0" # -# $ms2_a0_vnl0_fnl1.R -# [1] "ms2match_a0_vnl0_fnl1" "gen_ms2ions_a0_vnl0_fnl1" +# $ms2_gen.R +# [1] "gen_ms2ions_base" "gen_ms2ions_a0_vnl0_fnl1" "gen_ms2ions_a1_vnl0_fnl0" "calc_ms2ions_a1_vnl0_fnl0" +# [5] "check_ms1_mass_vmods2" "add_hexcodes" "gen_ms2ions_a1_vnl0_fnl1" "calc_ms2ions_a1_vnl0_fnl1" +# [9] "add_hexcodes_fnl2" "gen_ms2ions_a1_vnl1_fnl0" "calc_ms2ions_a1_vnl1_fnl0" "add_hexcodes_vnl2" # -# $ms2_a1_vnl0_fnl0.R -# [1] "ms2match_a1_vnl0_fnl0" "gen_ms2ions_a1_vnl0_fnl0" "calc_ms2ions_a1_vnl0_fnl0" "check_ms1_mass_vmods2" -# [5] "add_hexcodes" -# -# $ms2_a1_vnl0_fnl1.R -# [1] "ms2match_a1_vnl0_fnl1" "gen_ms2ions_a1_vnl0_fnl1" "calc_ms2ions_a1_vnl0_fnl1" "add_hexcodes_fnl2" -# -# $ms2_a1_vnl1_fnl0.R -# [1] "ms2match_a1_vnl1_fnl0" "gen_ms2ions_a1_vnl1_fnl0" "calc_ms2ions_a1_vnl1_fnl0" "add_hexcodes_vnl2" -# -# $ms2_base.R -# [1] "ms2match_base" "frames_adv" "gen_ms2ions_base" "fuzzy_match_one" "fuzzy_match_one2" "find_ms2_bypep" -# [7] "search_mgf" +# $ms2frames.R +# [1] "pair_mgftheo" "hms2match" "ms2match_all" "mframes_adv" "fuzzy_match_one" "fuzzy_match_one2" +# [7] "find_ms2_bypep" "search_mgf" "ms2match_one" "frames_adv" # # $msmsmatches.R -# [1] "matchMS" "try_psmC2Q" "reproc_psmC" "psmC2Q" "post_psmC2Q" "check_tmt_pars" -# [7] "checkMGF" "check_locmods" "map_raw_n_scan" "check_fdr_group" +# [1] "matchMS" "try_psmC2Q" "reproc_psmC" "psmC2Q" "post_psmC2Q" "check_tmt_pars" "checkMGF" +# [8] "check_locmods" "map_raw_n_scan" "check_fdr_group" # # $msmsmatches2.R -# [1] "ms2match" "hcalc_tmtint" "reverse_peps_in_frame" "reverse_seqs" "calib_ms1masses" +# [1] "ms2match" "reverse_peps_in_frame" "reverse_seqs" "calib_mgf" "calib_ms1" # # $mzion.R # character(0) @@ -85,29 +75,28 @@ # [1] "make_mztab" # # $percolator.R -# [1] "creat_folds" "cv_svm" "perco_svm" "probco_bypepcharge" "calc_z_pepfdr" +# [1] "creat_folds" "cv_svm" "perco_svm" # # $quant2.R -# [1] "calc_tmtint" "add_rptrs" "find_reporter_ints" "find_reporters_ppm" "msub_protpep" -# [6] "sub_protpep" "add_protacc2" "add_protacc" "hannot_decoys" "groupProts" -# [11] "map_pepprot" "collapse_sortpeps" "pcollapse_sortpeps" "chunksplit_spmat" "find_group_breaks" -# [16] "cut_proteinGroups" "sparseD_fourquad" "as_dist" "as_lgldist" "greedysetcover3" +# [1] "hcalc_tmtint" "calc_tmtint" "add_rptrs" "find_reporter_ints" "find_reporters_ppm" "msub_protpep" +# [7] "sub_protpep" "add_protacc2" "add_protacc" "hannot_decoys" "groupProts" "map_pepprot" +# [13] "collapse_sortpeps" "pcollapse_sortpeps" "chunksplit_spmat" "find_group_breaks" "cut_proteinGroups" "sparseD_fourquad" +# [19] "as_dist" "greedysetcover3" # # $roadmaps.R # character(0) # # $scores.R -# [1] "add_seions" "list_leftmatch" "calc_probi_byvmods" "calc_probi_bypep" -# [5] "calc_probi" "scalc_pepprobs" "calc_pepprobs_i" "calc_pepscores" -# [9] "find_decoy" "find_targets" "calcpepsc" "add_primatches" -# [13] "collapse_vecs" "post_pepscores" "find_pepscore_co1" "find_pepscore_co2" -# [17] "probco_bypeplen" "sub_td_byfdrtype" "find_optlens" "find_probco_valley" -# [21] "prep_pepfdr_td" "keep_pepfdr_best" "calc_pepfdr" "fill_probco_nas" -# [25] "fill_probs" "post_pepfdr" "calc_protfdr" "aggr_prot_es" -# [29] "calc_protfdr_i" "fit_protfdr" " f" "find_ppm_outer_bycombi" -# [33] "match_ex2th2" "calc_peploc" "calcpeprank_1" "calcpeprank_2" -# [37] "calcpeprank_3" "find_chunkbreaks" "findLocFracsDF" "concatFracs" -# [41] "na.interp" "is.constant" "tsoutliers" +# [1] "add_seions" "list_leftmatch" "calc_probi_byvmods" "calc_probi_bypep" "calc_probi" +# [6] "scalc_pepprobs" "calc_pepprobs_i" "calc_pepscores" "split_im" "order_fracs" +# [11] "combine_fracs" "move_scfiles" "find_decoy" "find_targets" "calcpepsc" +# [16] "hadd_primatches" "add_primatches" "collapse_vecs" "post_pepscores" "find_pepscore_co1" +# [21] "find_pepscore_co2" "probco_bypeplen" "sub_td_byfdrtype" "find_optlens" "find_probco_valley" +# [26] "prep_pepfdr_td" "keep_pepfdr_best" "calc_pepfdr" "fill_probco_nas" "fill_probs" +# [31] "post_pepfdr" "calc_protfdr" "aggr_prot_es" "calc_protfdr_i" "fit_protfdr" +# [36] " f" "find_ppm_outer_bycombi" "match_ex2th2" "calc_peploc" "calcpeprank_1" +# [41] "calcpeprank_2" "calcpeprank_3" "find_chunkbreaks" "findLocFracsDF" "concatFracs" +# [46] "na.interp" "is.constant" "tsoutliers" # # $silac.R # [1] "matchMS_silac_mix" "matchMS_par_groups" "add_fixedlab_masses" "matchMS_noenzyme" "combine_ion_matches" @@ -122,22 +111,21 @@ # # $utils_engine.R # [1] "which_topx" "which_topx2" "get_topn_vals" "insVal" "topx" -# [6] "find_ppm_error" "find_mass_error_range" "`%+%`" "post_ms2match" "`%+%`" -# [11] "post_frame_adv" "purge_search_space" "subset_theoframes" "subset_neuloss_peps" "find_nterm_mass" -# [16] "find_cterm_mass" "quick_rightjoin" "quick_leftjoin" "detect_cores" "find_free_mem" -# [21] "find_mod_indexes" "is_equal_sets" "expand_grid_rows" "count_elements" "vec_to_list" -# [26] "split_vec" "accumulate_char" "combi_mat" "make_zero_df" "calc_threeframe_ppm" -# [31] "check_ms1calib" "save_ms1calib" "get_ms1charges" "finds_uniq_vec" "my_dataframe" +# [6] "find_ppm_error" "find_mass_error_range" "`%+%`" "`%+%`" "post_frame_adv" +# [11] "subset_theoframes" "subset_neuloss_peps" "find_nterm_mass" "find_cterm_mass" "quick_rightjoin" +# [16] "quick_leftjoin" "detect_cores" "find_free_mem" "find_mod_indexes" "is_equal_sets" +# [21] "expand_grid_rows" "expand_grid" "expand_grid_rows0" "count_elements" "vec_to_list" +# [26] "split_vec" "fold_vec" "rep_vec" "accumulate_char" "combi_mat" +# [31] "make_zero_df" "calc_threeframe_ppm" "get_ms1charges" "finds_uniq_vec" "my_dataframe" # [36] "flatten_list" "calc_rev_ms2" "bind_dfs" # # $utils_os.R -# [1] "`names_pos<-`" "find_int_cols" "ins_cols_after" "add_cols_at" -# [5] "replace_cols_at" "reloc_col_after" "reloc_col_after_last" "reloc_col_after_first" -# [9] "reloc_col_before" "reloc_col_before_last" "reloc_col_before_first" "find_preceding_colnm" -# [13] "recur_flatten" "chunksplit" "chunksplitLB" "find_dir" -# [17] "create_dir" "save_call2" "find_callarg_vals" "match_calltime" -# [21] "delete_files" "find_ms1_times" "get_globalvar" "load_cache_info" -# [25] "is_nulllist" "add_nulllist" +# [1] "`names_pos<-`" "find_int_cols" "ins_cols_after" "add_cols_at" "replace_cols_at" +# [6] "reloc_col_after" "reloc_col_after_last" "reloc_col_after_first" "reloc_col_before" "reloc_col_before_last" +# [11] "reloc_col_before_first" "find_preceding_colnm" "recur_flatten" "chunksplit" "chunksplitLB" +# [16] "find_dir" "create_dir" "save_call2" "find_callarg_vals" "match_calltime" +# [21] "delete_files" "find_ms1_times" "get_globalvar" "load_cache_info" "is_nulllist" +# [26] "add_nulllist" # # $utils_ui.R # [1] "calc_monopeptide" "calc_monopep" "check_aaseq" "calc_ms2ionseries" "calc_ms2ions" "unique_mvmods" diff --git a/R/mapMS2ions.R b/R/mapMS2ions.R index 4ef5cf0..68ab592 100644 --- a/R/mapMS2ions.R +++ b/R/mapMS2ions.R @@ -260,14 +260,14 @@ find_secion_types <- function (type_ms2ions = "by") find_psm_rows <- function (file_t0, file_t1, file_t2, file_t3, scan, raw_file, rank = 1L, is_decoy = FALSE) { - psm <- find_psm_rows1(file_t1 = file_t1, file_t2 = file_t2, file_t3 = file_t3, + psm <- find_psm_rowsQ(file_t1 = file_t1, file_t2 = file_t2, file_t3 = file_t3, scan = scan, raw_file = raw_file, rank = rank, is_decoy = is_decoy) nrow <- nrow(psm) if (!nrow) { - psm <- find_psm_rows2(file_t0 = file_t0, scan = scan, raw_file = raw_file, + psm <- find_psm_rowsC(file_t0 = file_t0, scan = scan, raw_file = raw_file, rank = rank, is_decoy = is_decoy) nrow <- nrow(psm) } @@ -288,7 +288,7 @@ find_psm_rows <- function (file_t0, file_t1, file_t2, file_t3, scan, raw_file, #' Extracts the first row of matched PSMs from tiers 1-3. #' #' @inheritParams find_psm_rows -find_psm_rows1 <- function (file_t1, file_t2, file_t3, scan, raw_file, +find_psm_rowsQ <- function (file_t1, file_t2, file_t3, scan, raw_file, rank = 1L, is_decoy = FALSE) { ok <- any(ls(all.names = TRUE, envir = .GlobalEnv) == ".psms") @@ -343,7 +343,7 @@ find_psm_rows1 <- function (file_t1, file_t2, file_t3, scan, raw_file, #' Extracts the first row of matched PSMs from psmC. #' #' @inheritParams find_psm_rows -find_psm_rows2 <- function (file_t0, scan, raw_file, rank = 1L, +find_psm_rowsC <- function (file_t0, scan, raw_file, rank = 1L, is_decoy = FALSE) { ok <- any(ls(all.names = TRUE, envir = .GlobalEnv) == ".psmC") @@ -384,6 +384,7 @@ find_theoexpt_pair <- function (psm, out_path, scan, raw_id, is_decoy = FALSE) if (! x %in% col_nms) stop("PSM column not found: ", x) }) + # is.na(psm$pep_ivmod) with decoy entries pep_seq <- psm$pep_seq pep_ivmod <- psm$pep_ivmod @@ -455,6 +456,9 @@ find_theoexpt_pair <- function (psm, out_path, scan, raw_id, is_decoy = FALSE) theoexpt <- ion_match$matches[[1]] # (1) matched by `pep_seq` + if (is_decoy) + names(theoexpt) <- reverse_seqs(names(theoexpt)) + theoexpt <- theoexpt[names(theoexpt) == pep_seq] if (length(theoexpt) > 1L) { @@ -467,7 +471,8 @@ find_theoexpt_pair <- function (psm, out_path, scan, raw_id, is_decoy = FALSE) # (2) matched by pep_ivmod # (pep_seq matched but can still have multiple pep_ivmod's) - theoexpt <- theoexpt[names(theoexpt) == pep_ivmod] + if (!is_decoy) + theoexpt <- theoexpt[names(theoexpt) == pep_ivmod] # (can have multiple NLs) if (length(theoexpt) > 1L) { diff --git a/R/ms2_gen.R b/R/ms2_gen.R index c86749a..1e5bf9c 100644 --- a/R/ms2_gen.R +++ b/R/ms2_gen.R @@ -26,8 +26,8 @@ #' varmods <- c("TMT6plex (N-term)", "Acetyl (Protein N-term)", "Oxidation (M)", #' "Deamidated (N)", "Gln->pyro-Glu (N-term = Q)") #' -#' mod_indexes <- seq_along(c(fixedmods, varmods)) %>% -#' as.hexmode() %>% +#' mod_indexes <- seq_along(c(fixedmods, varmods)) |> +#' as.hexmode() |> #' `names<-`(c(fixedmods, varmods)) #' #' aa_masses_all <- calc_aamasses(fixedmods, varmods) @@ -61,8 +61,8 @@ #' fixedmods <- c("TMT6plex (N-term)", "TMT6plex (K)", "Carbamidomethyl (C)") #' varmods <- c("Oxidation (M)", "Deamidated (N)") #' -#' mod_indexes <- seq_along(c(fixedmods, varmods)) %>% -#' as.hexmode() %>% +#' mod_indexes <- seq_along(c(fixedmods, varmods)) |> +#' as.hexmode() |> #' `names<-`(c(fixedmods, varmods)) #' #' aa_masses_all <- calc_aamasses(fixedmods, varmods) @@ -102,16 +102,15 @@ gen_ms2ions_base <- function (aa_seq = NULL, ms1_mass = NULL, maxn_sites_per_vmod = 3L, # dummy - maxn_fnl_per_seq = 64L, maxn_vnl_per_seq = 64L, + maxn_fnl_per_seq = 3L, maxn_vnl_per_seq = 3L, maxn_vmods_sitescombi_per_pep = 64L, - digits = 4L) { aas <- .Internal(strsplit(aa_seq, "", fixed = TRUE, perl = FALSE, useBytes = FALSE)) aas <- .Internal(unlist(aas, recursive = FALSE, use.names = FALSE)) aam <- aa_masses[aas] - l <- length(aas) + l <- length(aas) nm <- .Internal(paste0(list(rep("0", l)), collapse = "", recycle0 = FALSE)) # currently no subsetting by ms1_mass af <- ms2ions_by_type(aam, ntmass, ctmass, type_ms2ions, digits) @@ -133,14 +132,13 @@ gen_ms2ions_base <- function (aa_seq = NULL, ms1_mass = NULL, #' @examples #' \donttest{ #' library(mzion) -#' library(magrittr) #' #' # (5) "amods- tmod+ vnl- fnl+" #' fixedmods <- c("TMT6plex (N-term)", "Oxidation (M)", "dHex (S)") #' varmods <- c("Acetyl (Protein N-term)") #' -#' mod_indexes <- seq_along(c(fixedmods, varmods)) %>% -#' as.hexmode() %>% +#' mod_indexes <- seq_along(c(fixedmods, varmods)) |> +#' as.hexmode() |> #' `names<-`(c(fixedmods, varmods)) #' #' aa_masses_all <- calc_aamasses(fixedmods, varmods) @@ -169,12 +167,12 @@ gen_ms2ions_base <- function (aa_seq = NULL, ms1_mass = NULL, #' # variable `TMT6plex (N-term)` + `fixed Oxidation (M)` #' # (additive varmod on top of fixedmod allowed) #' -#' out <- mzion:::gen_ms2ions_a0_vnl0_fnl1(aa_seq = aa_seq, ms1_mass = NULL, -#' aa_masses = aa_masses, ntmod = NULL, ctmod = NULL, -#' ntmass = ntmass, ctmass = ctmass, -#' amods = NULL, vmods_nl = NULL, fmods_nl = fmods_nl, -#' mod_indexes = mod_indexes) -#' +#' out <- mzion:::gen_ms2ions_a0_vnl0_fnl1( +#' aa_seq = aa_seq, ms1_mass = NULL, +#' aa_masses = aa_masses, ntmod = NULL, ctmod = NULL, +#' ntmass = ntmass, ctmass = ctmass, +#' amods = NULL, vmods_nl = NULL, fmods_nl = fmods_nl, +#' mod_indexes = mod_indexes) #' } gen_ms2ions_a0_vnl0_fnl1 <- function (aa_seq, ms1_mass = NULL, aa_masses = NULL, ms1vmods = NULL, ms2vmods = NULL, @@ -185,10 +183,10 @@ gen_ms2ions_a0_vnl0_fnl1 <- function (aa_seq, ms1_mass = NULL, mod_indexes = NULL, type_ms2ions = "by", maxn_vmods_per_pep = 5L, maxn_sites_per_vmod = 3L, - maxn_fnl_per_seq = 8L, + maxn_fnl_per_seq = 3L, # dummy - maxn_vnl_per_seq = 8L, + maxn_vnl_per_seq = 3L, maxn_vmods_sitescombi_per_pep = 64L, digits = 4L) @@ -197,8 +195,12 @@ gen_ms2ions_a0_vnl0_fnl1 <- function (aa_seq, ms1_mass = NULL, # (no pep_seq dispatching by Anywhere fmod residues -> possible no matched sites) sites <- names(fmods_nl) - pattern <- .Internal(paste0(list(sites), collapse = "|", recycle0 = FALSE)) + pattern <- if (length(sites) > 1L) + .Internal(paste0(list(sites), collapse = "|", recycle0 = FALSE)) + else + sites + if (!grepl(pattern, aa_seq)) return( gen_ms2ions_base(aa_seq = aa_seq, ms1_mass = ms1_mass, @@ -225,7 +227,7 @@ gen_ms2ions_a0_vnl0_fnl1 <- function (aa_seq, ms1_mass = NULL, # At fixedmods "Oxidation (M)", pep_seq(s) may not contain "M"; # (as `distri_peps` does not filter pep_seq by fixedmods) - idxes <- .Internal(which(aas %in% names(fmods_nl))) + idxes <- .Internal(which(aas %fin% names(fmods_nl))) if (length(idxes) > maxn_vmods_per_pep) idxes <- idxes[1:maxn_vmods_per_pep] @@ -233,9 +235,23 @@ gen_ms2ions_a0_vnl0_fnl1 <- function (aa_seq, ms1_mass = NULL, # --- fmods_combi <- aas[idxes] names(fmods_combi) <- idxes - fnl_combi <- expand_grid_rows(fmods_nl[fmods_combi], nmax = maxn_fnl_per_seq, - use.names = FALSE) - len <- length(fnl_combi) + + if (length(fmods_combi) == 1L) { + fnls <- fmods_nl[[fmods_combi]] + len <- length(fnls) + ans <- vector("list", len) + + for (i in 1:len) { + ans[[i]] <- fnls[[i]] + names(ans[[i]]) <- fmods_combi + } + } + else { + ans <- expand_grid_rows(fmods_nl[fmods_combi], nmax = maxn_fnl_per_seq, + use.names = FALSE) + len <- length(ans) + } + av <- af <- vector("list", len) aam <- aa_masses[aas] af[[1]] <- af1 <- ms2ions_by_type(aam, ntmass, ctmass, type_ms2ions, digits) @@ -246,8 +262,7 @@ gen_ms2ions_a0_vnl0_fnl1 <- function (aa_seq, ms1_mass = NULL, aamii <- aami[idxes] for (i in 2:len) { - fnl_combi_i <- fnl_combi[[i]] - + fnl_combi_i <- ans[[i]] aami[idxes] <- aamii - fnl_combi_i af[[i]] <- afi <- ms2ions_by_type(aami, ntmass, ctmass, type_ms2ions, digits) av[[i]] <- calc_rev_ms2(afi, aas) @@ -280,15 +295,14 @@ gen_ms2ions_a0_vnl0_fnl1 <- function (aa_seq, ms1_mass = NULL, #' @examples #' \donttest{ #' library(mzion) -#' library(magrittr) #' #' # (8a) "amods+ tmod+ vnl- fnl-" #' fixedmods <- c("TMT6plex (K)") #' varmods <- c("Deamidated (N)", "Carbamidomethyl (S)", #' "Acetyl (Protein N-term)") #' -#' mod_indexes <- seq_along(c(fixedmods, varmods)) %>% -#' as.hexmode() %>% +#' mod_indexes <- seq_along(c(fixedmods, varmods)) |> +#' as.hexmode() |> #' `names<-`(c(fixedmods, varmods)) #' #' aa_masses_all <- calc_aamasses(fixedmods, varmods) @@ -346,8 +360,8 @@ gen_ms2ions_a0_vnl0_fnl1 <- function (aa_seq, ms1_mass = NULL, #' fixedmods <- sort(fixedmods) #' varmods <- sort(varmods) #' -#' mod_indexes <- seq_along(c(fixedmods, varmods)) %>% -#' as.hexmode() %>% +#' mod_indexes <- seq_along(c(fixedmods, varmods)) |> +#' as.hexmode() |> #' `names<-`(c(fixedmods, varmods)) #' #' aa_masses_all <- calc_aamasses(fixedmods, varmods) @@ -407,7 +421,7 @@ gen_ms2ions_a1_vnl0_fnl0 <- function (aa_seq, ms1_mass = NULL, aa_masses = NULL, maxn_sites_per_vmod = 3L, # dummy - maxn_fnl_per_seq = 64L, maxn_vnl_per_seq = 64L, + maxn_fnl_per_seq = 3L, maxn_vnl_per_seq = 3L, maxn_vmods_sitescombi_per_pep = 64L, digits = 4L) @@ -578,14 +592,13 @@ add_hexcodes <- function (ms2ions, vmods_combi, len, mod_indexes = NULL) #' @examples #' \donttest{ #' library(mzion) -#' library(magrittr) #' #' # (12) "amods+ tmod+ vnl- fnl+" #' fixedmods <- c("TMT6plex (K)", "Oxidation (M)", "dHex (S)") #' varmods <- c("Deamidated (N)", "Acetyl (Protein N-term)") #' -#' mod_indexes <- seq_along(c(fixedmods, varmods)) %>% -#' as.hexmode() %>% +#' mod_indexes <- seq_along(c(fixedmods, varmods)) |> +#' as.hexmode() |> #' `names<-`(c(fixedmods, varmods)) #' #' aa_masses_all <- calc_aamasses(fixedmods, varmods) @@ -653,11 +666,10 @@ gen_ms2ions_a1_vnl0_fnl1 <- function (aa_seq = NULL, ms1_mass = NULL, maxn_vmods_per_pep = 5L, maxn_sites_per_vmod = 3L, maxn_vmods_sitescombi_per_pep = 64L, - maxn_fnl_per_seq = 64L, + maxn_fnl_per_seq = 3L, # dummy - maxn_vnl_per_seq = 64L, - + maxn_vnl_per_seq = 3L, digits = 4L) { # (7, 8) "amods+ tmod- vnl- fnl-", "amods+ tmod+ vnl- fnl-" @@ -707,12 +719,25 @@ gen_ms2ions_a1_vnl0_fnl1 <- function (aa_seq = NULL, ms1_mass = NULL, fnl_idxes <- .Internal(which(aas %in% names(fmods_nl))) fmods_combi <- aas[fnl_idxes] names(fmods_combi) <- fnl_idxes - fnl_combi <- expand_grid_rows(fmods_nl[fmods_combi], nmax = maxn_fnl_per_seq) + + if (length(fmods_combi) == 1L) { + fnls <- fmods_nl[[fmods_combi]] + len <- length(fnls) + ans <- vector("list", len) + + for (i in 1:len) { + ans[[i]] <- fnls[[i]] + names(ans[[i]]) <- fmods_combi + } + } + else { + ans <- expand_grid_rows(fmods_nl[fmods_combi], nmax = maxn_fnl_per_seq) + } # go through each vmods_combi af <- lapply(vmods_combi, calc_ms2ions_a1_vnl0_fnl1, - fnl_combi, fnl_idxes, aam, aa_masses, ntmass, ctmass, + ans, fnl_idxes, aam, aa_masses, ntmass, ctmass, type_ms2ions, digits = digits) af <- mapply( @@ -805,15 +830,14 @@ add_hexcodes_fnl2 <- function (ms2ions, vmods_combi, len, mod_indexes = NULL) #' @examples #' \donttest{ #' library(mzion) -#' library(magrittr) #' #' # (10) "amods+ tmod+ vnl+ fnl-" #' fixedmods <- c("TMT6plex (K)") #' varmods <- c("dHex (S)", "Oxidation (M)", "Deamidated (N)", #' "Acetyl (Protein N-term)") #' -#' mod_indexes <- seq_along(c(fixedmods, varmods)) %>% -#' as.hexmode() %>% +#' mod_indexes <- seq_along(c(fixedmods, varmods)) |> +#' as.hexmode() |> #' `names<-`(c(fixedmods, varmods)) #' #' aa_masses_all <- calc_aamasses(fixedmods, varmods) @@ -882,8 +906,8 @@ add_hexcodes_fnl2 <- function (ms2ions, vmods_combi, len, mod_indexes = NULL) #' varmods <- c("Acetyl (Protein N-term)", "Oxidation (M)", #' "Carbamidomethyl (M)") #' -#' mod_indexes <- seq_along(c(fixedmods, varmods)) %>% -#' as.hexmode() %>% +#' mod_indexes <- seq_along(c(fixedmods, varmods)) |> +#' as.hexmode() |> #' `names<-`(c(fixedmods, varmods)) #' #' aa_masses_all <- calc_aamasses(fixedmods, varmods) @@ -947,9 +971,9 @@ gen_ms2ions_a1_vnl1_fnl0 <- function (aa_seq = NULL, ms1_mass = NULL, maxn_vmods_sitescombi_per_pep = 64L, # dummy - maxn_fnl_per_seq = 64L, + maxn_fnl_per_seq = 3L, - maxn_vnl_per_seq = 64L, + maxn_vnl_per_seq = 3L, digits = 4L) { aas <- .Internal(strsplit(aa_seq, "", fixed = TRUE, perl = FALSE, useBytes = FALSE)) diff --git a/R/ms2frames.R b/R/ms2frames.R index 5a10d85..14730ae 100644 --- a/R/ms2frames.R +++ b/R/ms2frames.R @@ -3,24 +3,61 @@ #' @param mgf_path The path to MGF files #' @param n_modules The number of modules (\code{length(aa_masses_all)}) or one #' @param .path_bin The path to binned theoretical masses +#' @param ppm_ms1_bin The tolerance in precursor mass error after mass binning. #' @param by_modules Logical; if TRUE, results are saved with one mgf to one #' theo module. At FALSE, results are saved with one mgf paired to all theo #' modules -pair_mgftheo <- function (mgf_path, n_modules, .path_bin, by_modules = TRUE) +#' @inheritParams ms2match +#' @inheritParams matchMS +pair_mgftheo <- function (mgf_path, n_modules, .path_bin, by_modules = TRUE, + reframe_mgfs = FALSE, min_mass = 200L, + ppm_ms1_bin = 10L, first_search = FALSE) { message("Pairing experimental and theoretical data.") + tempfiles <- if (by_modules) + list.files(mgf_path, pattern = "^expttheo_", full.names = TRUE) + else + list.files(mgf_path, pattern = "^mgftheo_", full.names = TRUE) + + if (length(tempfiles)) + unlink(tempfiles) + # MGFs (in data frame) split by frame indexes mgf_files <- list.files(mgf_path, pattern = "^mgf_queries_\\d+\\.rds$", full.names = TRUE) mgf_frames <- lapply(mgf_files, qs::qread) + + # for MGF calibrations + if (first_search) { + mgf_frames <- lapply(mgf_frames, function (x) { + min_mgfmass <- min(x$ms1_mass, na.rm = TRUE) + max_mgfmass <- max(x$ms1_mass, na.rm = TRUE) + oks_min <- with(x, ms1_mass <= min_mgfmass + 10L) + oks_max <- with(x, ms1_mass >= max_mgfmass - 10L) + + mgfa <- x[oks_max, ] + mgfb <- x[oks_min, ] + mgfc <- x[!(oks_max | oks_min), ] + rows <- (1:nrow(mgfc)) %% 10L == 1L + dplyr::bind_rows(mgfa, mgfc[rows, ], mgfb) + }) + } + mgf_frames <- dplyr::bind_rows(mgf_frames) + + if (reframe_mgfs) { + mgf_frames[["frame"]] <- + find_ms1_interval(mgf_frames[["ms1_mass"]], from = min_mass, + ppm = ppm_ms1_bin) + } + mgf_frames <- dplyr::group_by(mgf_frames, frame) mgf_frames <- dplyr::group_split(mgf_frames) fr_names <- lapply(mgf_frames, function (x) x[["frame"]][1]) names(mgf_frames) <- unlist(fr_names, recursive = FALSE, use.names = FALSE) - # into chunks: each chunk has multiple frames: each frame multiple precursors + # -> chunks: each chunk has multiple frames: each frame multiple precursors ranges <- seq_along(mgf_frames) n_chunks <- if (n_modules == 1L || by_modules) @@ -1526,20 +1563,20 @@ frames_adv <- function (mgf_frames = NULL, theopeps = NULL, frame <- mgfs_cr[["frame"]][1] bfi <- 1L - theos_bf_ms1 <- theopeps[[bfi]] - theopeps_bf_ms1 <- theos_bf_ms1[["pep_seq"]] - theomasses_bf_ms1 <- theos_bf_ms1[["mass"]] + thbf <- theopeps[[bfi]] + thbf_peps <- thbf[["pep_seq"]] + thbf_masses <- thbf[["mass"]] cri <- bfi + 1L - theos_cr_ms1 <- theopeps[[cri]] - theopeps_cr_ms1 <- theos_cr_ms1[["pep_seq"]] - theomasses_cr_ms1 <- theos_cr_ms1[["mass"]] + thcr <- theopeps[[cri]] + thcr_peps <- thcr[["pep_seq"]] + thcr_masses <- thcr[["mass"]] # generate both target and decoy MS2 - theos_bf_ms2 <- mapply( + thbf_ms2s <- mapply( FUN, - aa_seq = theopeps_bf_ms1, - ms1_mass = theomasses_bf_ms1, + aa_seq = thbf_peps, + ms1_mass = thbf_masses, MoreArgs = list( aa_masses = aa_masses, ms1vmods = ms1vmods, @@ -1563,12 +1600,12 @@ frames_adv <- function (mgf_frames = NULL, theopeps = NULL, ) # temporarily share peptide names between targets and decoys; # later is.na(pep_ivmod) -> decoys -> add "-" to prot_acc -> reverse sequence - names(theos_bf_ms2) <- theopeps_bf_ms1 + names(thbf_ms2s) <- thbf_peps - theos_cr_ms2 <- mapply( + thcr_ms2s <- mapply( FUN, - aa_seq = theopeps_cr_ms1, - ms1_mass = theomasses_cr_ms1, + aa_seq = thcr_peps, + ms1_mass = thcr_masses, MoreArgs = list( aa_masses = aa_masses, ms1vmods = ms1vmods, @@ -1591,11 +1628,11 @@ frames_adv <- function (mgf_frames = NULL, theopeps = NULL, SIMPLIFY = FALSE, USE.NAMES = FALSE ) - names(theos_cr_ms2) <- theopeps_cr_ms1 + names(thcr_ms2s) <- thcr_peps ## --- iteration --- for (i in seq_len(len)) { - exptmasses_ms1 <- mgfs_cr$ms1_mass + exptmasses_ms1 <- mgfs_cr$ms1_mass exptmoverzs_ms2 <- mgfs_cr$ms2_moverz ### Slower to subset + passed as argument @@ -1606,14 +1643,14 @@ frames_adv <- function (mgf_frames = NULL, theopeps = NULL, afi <- cri + 1L - theos_af_ms1 <- theopeps[[afi]] - theopeps_af_ms1 <- theos_af_ms1[["pep_seq"]] - theomasses_af_ms1 <- theos_af_ms1[["mass"]] + thaf <- theopeps[[afi]] + thaf_peps <- thaf[["pep_seq"]] + thaf_masses <- thaf[["mass"]] - theos_af_ms2 <- mapply( + thaf_ms2s <- mapply( FUN, - aa_seq = theopeps_af_ms1, - ms1_mass = theomasses_af_ms1, + aa_seq = thaf_peps, + ms1_mass = thaf_masses, MoreArgs = list( aa_masses = aa_masses, ms1vmods = ms1vmods, @@ -1635,7 +1672,7 @@ frames_adv <- function (mgf_frames = NULL, theopeps = NULL, SIMPLIFY = FALSE, USE.NAMES = FALSE ) - names(theos_af_ms2) <- theopeps_af_ms1 + names(thaf_ms2s) <- thaf_peps # each `out` for the results of multiple mgfs in one frame out[[i]] <- mapply( @@ -1644,8 +1681,8 @@ frames_adv <- function (mgf_frames = NULL, theopeps = NULL, expt_moverz_ms2 = exptmoverzs_ms2, MoreArgs = list( pep_mod_groups = pep_mod_group, - theomasses_ms1 = c(theomasses_bf_ms1, theomasses_cr_ms1, theomasses_af_ms1), - theomasses_ms2 = c(theos_bf_ms2, theos_cr_ms2, theos_af_ms2), + theomasses_ms1 = c(thbf_masses, thcr_masses, thaf_masses), + theomasses_ms2 = c(thbf_ms2s, thcr_ms2s, thaf_ms2s), minn_ms2 = minn_ms2, ppm_ms1 = ppm_ms1, ppm_ms2 = ppm_ms2, @@ -1666,29 +1703,29 @@ frames_adv <- function (mgf_frames = NULL, theopeps = NULL, if (isTRUE(new_frame == (frame + 1L))) { cri <- cri + 1L - theos_bf_ms1 <- theos_cr_ms1 - theomasses_bf_ms1 <- theomasses_cr_ms1 - theos_bf_ms2 <- theos_cr_ms2 + thbf <- thcr + thbf_masses <- thcr_masses + thbf_ms2s <- thcr_ms2s - theos_cr_ms1 <- theos_af_ms1 - theomasses_cr_ms1 <- theomasses_af_ms1 - theos_cr_ms2 <- theos_af_ms2 + thcr <- thaf + thcr_masses <- thaf_masses + thcr_ms2s <- thaf_ms2s } else if (isTRUE(new_frame == (frame + 2L))) { cri <- cri + 2L - theos_bf_ms1 <- theos_af_ms1 - theomasses_bf_ms1 <- theomasses_af_ms1 - theos_bf_ms2 <- theos_af_ms2 + thbf <- thaf + thbf_masses <- thaf_masses + thbf_ms2s <- thaf_ms2s - theos_cr_ms1 <- theopeps[[cri]] - theopeps_cr_ms1 <- theos_cr_ms1[["pep_seq"]] - theomasses_cr_ms1 <- theos_cr_ms1[["mass"]] + thcr <- theopeps[[cri]] + thcr_peps <- thcr[["pep_seq"]] + thcr_masses <- thcr[["mass"]] - theos_cr_ms2 <- mapply( + thcr_ms2s <- mapply( FUN, - aa_seq = theopeps_cr_ms1, - ms1_mass = theomasses_cr_ms1, + aa_seq = thcr_peps, + ms1_mass = thcr_masses, MoreArgs = list( aa_masses = aa_masses, ms1vmods = ms1vmods, @@ -1710,24 +1747,24 @@ frames_adv <- function (mgf_frames = NULL, theopeps = NULL, SIMPLIFY = FALSE, USE.NAMES = FALSE ) - names(theos_cr_ms2) <- theopeps_cr_ms1 + names(thcr_ms2s) <- thcr_peps } else { cri <- cri + 3L bfi <- cri - 1L - theos_bf_ms1 <- theopeps[[bfi]] - theopeps_bf_ms1 <- theos_bf_ms1[["pep_seq"]] - theomasses_bf_ms1 <- theos_bf_ms1[["mass"]] + thbf <- theopeps[[bfi]] + thbf_peps <- thbf[["pep_seq"]] + thbf_masses <- thbf[["mass"]] - theos_cr_ms1 <- theopeps[[cri]] - theopeps_cr_ms1 <- theos_cr_ms1[["pep_seq"]] - theomasses_cr_ms1 <- theos_cr_ms1[["mass"]] + thcr <- theopeps[[cri]] + thcr_peps <- thcr[["pep_seq"]] + thcr_masses <- thcr[["mass"]] - theos_bf_ms2 <- mapply( + thbf_ms2s <- mapply( FUN, - aa_seq = theopeps_bf_ms1, - ms1_mass = theomasses_bf_ms1, + aa_seq = thbf_peps, + ms1_mass = thbf_masses, MoreArgs = list( aa_masses = aa_masses, ms1vmods = ms1vmods, @@ -1749,12 +1786,12 @@ frames_adv <- function (mgf_frames = NULL, theopeps = NULL, SIMPLIFY = FALSE, USE.NAMES = FALSE ) - names(theos_bf_ms2) <- theopeps_bf_ms1 + names(thbf_ms2s) <- thbf_peps - theos_cr_ms2 <- mapply( + thcr_ms2s <- mapply( FUN, - aa_seq = theopeps_cr_ms1, - ms1_mass = theomasses_cr_ms1, + aa_seq = thcr_peps, + ms1_mass = thcr_masses, MoreArgs = list( aa_masses = aa_masses, ms1vmods = ms1vmods, @@ -1776,7 +1813,7 @@ frames_adv <- function (mgf_frames = NULL, theopeps = NULL, SIMPLIFY = FALSE, USE.NAMES = FALSE ) - names(theos_cr_ms2) <- theopeps_cr_ms1 + names(thcr_ms2s) <- thcr_peps } frame <- new_frame diff --git a/R/msmsmatches.R b/R/msmsmatches.R index 8c93880..9ae0850 100644 --- a/R/msmsmatches.R +++ b/R/msmsmatches.R @@ -249,13 +249,12 @@ #' default is 20. #' @param ppm_ms2 A positive integer; the mass tolerance of MS2 species. The #' default is 20. -#' @param calib_ms1mass Temporarily diabled. Logical; if TRUE, calibrates -#' precursor masses. +#' @param calib_ms1mass Logical; if TRUE, calibrates precursor masses. #' @param ppm_reporters A positive integer; the mass tolerance of MS2 reporter #' ions. The default is 10. #' @param ppm_ms1calib A positive integer; the mass tolerance of MS1 species for -#' precursor mass calibration. The default is 10. The argument has no effect -#' at \code{calib_ms1mass = FALSE}. +#' precursor mass calibration. The argument has no effect at +#' \code{calib_ms1mass = FALSE}. #' @param quant A character string; the quantitation method. The default is #' "none". Additional choices include \code{tmt6, tmt10, tmt11, tmt16 and #' tmt18}. For other multiplicities of \code{tmt}, use the compatible higher @@ -409,6 +408,10 @@ #' suggested. Occasionally experimenters may remove the file folder for disk #' space or under infrequent events of modified framework incurred by the #' developer. +#' @param by_modules Logical. Experimenting. At the TRUE default, searches MS +#' data by individual modules of combinatorial fixed and variable +#' modifications. If FALSE, search all modules together. The later would +#' probably need more than 32G RAM if the number of modules is over 96. #' @param digits A non-negative integer; the number of decimal places to be #' used. The default is 4. #' @param ... Not currently used. @@ -706,7 +709,7 @@ matchMS <- function (out_path = "~/mzion/outs", min_scan_num = 1L, max_scan_num = .Machine$integer.max, min_ret_time = 0, max_ret_time = Inf, calib_ms1mass = FALSE, - ppm_ms1calib = 10L, + ppm_ms1calib = 20L, add_ms2theos = FALSE, add_ms2theos2 = FALSE, add_ms2moverzs = FALSE, add_ms2ints = FALSE, @@ -722,6 +725,7 @@ matchMS <- function (out_path = "~/mzion/outs", svm_costs = c(.1, .3, 1, 3, 10), svm_def_cost = 1, svm_iters = 10L, + by_modules = TRUE, digits = 4L, ...) { options(digits = 9L) @@ -736,10 +740,6 @@ matchMS <- function (out_path = "~/mzion/outs", add = TRUE ) - ## Experimenting - by_modules <- TRUE - ## - message("Started at: ", Sys.time()) this_call <- match.call() @@ -968,7 +968,6 @@ matchMS <- function (out_path = "~/mzion/outs", } # fdr_group - # for future supports of character strings or integers (mod_groups) # fdr_group <- check_fdr_group(fdr_group, eval(this_fml[["fdr_group"]])) oks <- eval(this_fml[["fdr_group"]]) @@ -1182,20 +1181,32 @@ matchMS <- function (out_path = "~/mzion/outs", } ## Bin theoretical peptides - ppm_precsr <- if (calib_ms1mass) ppm_ms1calib else ppm_ms1 bypass_bin_ms1 <- dots$bypass_bin_ms1 if (is.null(bypass_bin_ms1)) bypass_bin_ms1 <- FALSE + + reframe_mgfs <- calib_ms1mass && ppm_ms1calib != ppm_ms1 if (!bypass_bin_ms1) { bin_ms1masses(res = res, min_mass = min_mass, max_mass = max_mass, - ppm_ms1 = ppm_precsr, + ppm_ms1 = ppm_ms1, use_ms1_cache = use_ms1_cache, .path_cache = .path_cache, .path_ms1masses = .path_ms1masses, out_path = out_path) + if (reframe_mgfs) { + bin_ms1masses(res = res, + min_mass = min_mass, + max_mass = max_mass, + ppm_ms1 = ppm_ms1calib, + use_ms1_cache = use_ms1_cache, + .path_cache = .path_cache, + .path_ms1masses = .path_ms1masses, + out_path = out_path) + } + try(rm(list = "res"), silent = TRUE) gc() } @@ -1204,7 +1215,7 @@ matchMS <- function (out_path = "~/mzion/outs", bypass_mgf <- dots$bypass_mgf if (is.null(bypass_mgf)) bypass_mgf <- FALSE - if (!bypass_mgf) { + if (!bypass_mgf) load_mgfs(out_path = out_path, mgf_path = mgf_path, min_mass = min_mass, @@ -1218,7 +1229,7 @@ matchMS <- function (out_path = "~/mzion/outs", max_scan_num = max_scan_num, min_ret_time = min_ret_time, max_ret_time = max_ret_time, - ppm_ms1 = ppm_precsr, + ppm_ms1 = ppm_ms1, ppm_ms2 = ppm_ms2, mgf_cutmzs = mgf_cutmzs, mgf_cutpercs = mgf_cutpercs, @@ -1229,7 +1240,6 @@ matchMS <- function (out_path = "~/mzion/outs", index_mgf_ms2 = index_mgf_ms2, quant = quant, digits = digits) - } ## MSMS matches bypass_ms2match <- dots$bypass_ms2match @@ -1254,16 +1264,19 @@ matchMS <- function (out_path = "~/mzion/outs", mod_indexes <- NULL } - if (FALSE || calib_ms1mass) + if (calib_ms1mass) { calib_mgf(mgf_path = mgf_path, aa_masses_all = aa_masses_all[1], # base - out_path = out_path, mod_indexes = mod_indexes, + out_path = out_path, + mod_indexes = mod_indexes[names(mod_indexes) %in% fixedmods], type_ms2ions = type_ms2ions, maxn_vmods_per_pep = maxn_vmods_per_pep, maxn_sites_per_vmod = maxn_sites_per_vmod, maxn_fnl_per_seq = maxn_fnl_per_seq, maxn_vnl_per_seq = maxn_vnl_per_seq, maxn_vmods_sitescombi_per_pep = maxn_vmods_sitescombi_per_pep, - minn_ms2 = minn_ms2, ppm_ms1 = ppm_ms1, ppm_ms1calib = ppm_ms1calib, + minn_ms2 = minn_ms2, + ppm_ms1 = ppm_ms1calib, + reframe_mgfs = reframe_mgfs, ppm_ms2 = ppm_ms2, min_mass = min_mass, max_mass = max_mass, min_ms2mass = min_ms2mass, quant = quant, ppm_reporters = ppm_reporters, index_mgf_ms2 = index_mgf_ms2, @@ -1274,6 +1287,7 @@ matchMS <- function (out_path = "~/mzion/outs", maxn_vmods_setscombi = maxn_vmods_setscombi, min_len = min_len, max_len = max_len, max_miss = max_miss, knots = 50L, digits = digits) + } if (!bypass_ms2match) { if (min_ms2mass < 5L) @@ -1290,17 +1304,14 @@ matchMS <- function (out_path = "~/mzion/outs", maxn_vnl_per_seq = maxn_vnl_per_seq, maxn_vmods_sitescombi_per_pep = maxn_vmods_sitescombi_per_pep, minn_ms2 = minn_ms2, - ppm_ms1 = ppm_ms1, - ppm_ms1calib = ppm_ms1calib, - + reframe_mgfs = FALSE, ppm_ms2 = ppm_ms2, min_mass = min_mass, max_mass = max_mass, min_ms2mass = min_ms2mass, quant = quant, ppm_reporters = ppm_reporters, - calib_ms1mass = calib_ms1mass, index_mgf_ms2 = index_mgf_ms2, by_modules = by_modules, @@ -1381,6 +1392,7 @@ matchMS <- function (out_path = "~/mzion/outs", add_ms2theos2 = add_ms2theos2, add_ms2moverzs = add_ms2moverzs, add_ms2ints = add_ms2ints, + by_modules = by_modules, index_mgf_ms2 = index_mgf_ms2) ## Peptide FDR diff --git a/R/msmsmatches2.R b/R/msmsmatches2.R index a14a6b1..f8083c1 100644 --- a/R/msmsmatches2.R +++ b/R/msmsmatches2.R @@ -7,6 +7,10 @@ #' fixed and variable modifications. #' @param mod_indexes Integer; the indexes of fixed and/or variable #' modifications. +#' @param reframe_mgfs Logical; if TRUE, recalculates the frame indexes of MGFs. +#' @param first_search Logical; is the first search (for MGF mass calibration) +#' or not. +#' @param .savecall Logical; if TRUE, saves the current call. #' @inheritParams matchMS #' @inheritParams load_mgfs #' @inheritParams frames_adv @@ -16,10 +20,10 @@ ms2match <- function (mgf_path, aa_masses_all, out_path, mod_indexes, type_ms2ions = "by", maxn_vmods_per_pep = 5L, maxn_sites_per_vmod = 3L, maxn_fnl_per_seq = 64L, maxn_vnl_per_seq = 64L, maxn_vmods_sitescombi_per_pep = 64L, - minn_ms2 = 6L, ppm_ms1 = 20L, ppm_ms1calib = 10L, - ppm_ms2 = 20L, min_mass = 200L, max_mass = 4500L, - min_ms2mass = 115L, quant = "none", ppm_reporters = 10L, - calib_ms1mass = FALSE, by_modules = TRUE, + minn_ms2 = 6L, ppm_ms1 = 20L, ppm_ms2 = 20L, + min_mass = 200L, max_mass = 4500L, min_ms2mass = 115L, + quant = "none", ppm_reporters = 10L, + by_modules = TRUE, reframe_mgfs = FALSE, # dummies fasta, acc_type, acc_pattern, @@ -28,8 +32,8 @@ ms2match <- function (mgf_path, aa_masses_all, out_path, maxn_fasta_seqs, maxn_vmods_setscombi, min_len, max_len, max_miss, - index_mgf_ms2 = FALSE, - digits = 4L) + index_mgf_ms2 = FALSE, first_search = FALSE, + .savecall = TRUE, digits = 4L) { options(digits = 9L) @@ -42,15 +46,15 @@ ms2match <- function (mgf_path, aa_masses_all, out_path, ) # Check cached - fun <- as.character(match.call()[[1]]) + fun <- as.character(match.call()[[1]]) fun_env <- environment() fml_nms <- names(formals(fun)) - file_aa <- file.path(out_path, "aa_masses_all.rds") + faa <- file.path(out_path, "aa_masses_all.rds") # (OK as `argument` not for users) # min_mass and max_mass only for calib_ms1mass, not to be changed by users - # args_except <- c("quant", "min_mass", "max_mass", "calib_ms1mass", "by_modules") - args_except <- c("by_modules") + # args_except <- c("quant", "min_mass", "max_mass", "by_modules") + args_except <- c("by_modules", "first_search") fml_incl <- fml_nms[!fml_nms %in% args_except] cache_pars <- find_callarg_vals(time = NULL, path = file.path(out_path, "Calls"), @@ -67,8 +71,8 @@ ms2match <- function (mgf_path, aa_masses_all, out_path, if (length(fions)) { message("Found ", length(fions), " cached ion matches.") - if (!file.exists(file_aa)) - qs::qsave(aa_masses_all, file_aa) + if (!file.exists(faa)) + qs::qsave(aa_masses_all, faa) .savecall <- FALSE @@ -78,10 +82,12 @@ ms2match <- function (mgf_path, aa_masses_all, out_path, rm(list = c("args_except", "cache_pars", "call_pars")) - delete_files(out_path, ignores = c("\\.[Rr]$", "\\.(mgf|MGF)$", "\\.xlsx$", - "\\.xls$", "\\.csv$", "\\.txt$", - "^mgf$", "^mgfs$", "Calls")) - + delete_files( + out_path, + ignores = c("\\.[Rr]$", "\\.(mgf|MGF)$", "\\.xlsx$", + "\\.xls$", "\\.csv$", "\\.txt$", + "^mgf$", "^mgfs$", "Calls")) + # pairs expts and theos .path_bin <- get(".path_bin", envir = .GlobalEnv, inherits = FALSE) @@ -97,8 +103,17 @@ ms2match <- function (mgf_path, aa_masses_all, out_path, nfiles_a <- 0L } + # For three-frame searches + # (matches of secondary ions may use `outer` products and no adjustments) + ppm_ms1_bin <- calc_threeframe_ppm(ppm_ms1) + ppm_ms2_bin <- calc_threeframe_ppm(ppm_ms2) + + # if (first_search) subset mgf + pair_mgftheo(mgf_path = mgf_path, n_modules = length(aa_masses_all), - .path_bin = .path_bin, by_modules = by_modules) + .path_bin = .path_bin, by_modules = by_modules, + reframe_mgfs = reframe_mgfs, min_mass = min_mass, + ppm_ms1_bin = ppm_ms1_bin, first_search = first_search) rm(list = c("files_a", "files_b", "nfiles_a", "nfiles_b")) @@ -126,13 +141,6 @@ ms2match <- function (mgf_path, aa_masses_all, out_path, maxn_sites_per_vmod = maxn_sites_per_vmod) ms2vmods_all <- lapply(ms1vmods_all, lapply, make_ms2vmods) - # For three-frame searches - # (matches of secondary ions may use `outer` products and no adjustments) - ppm_precsr <- if (calib_ms1mass) ppm_ms1calib else ppm_ms1 - ppm_ms1_bin <- calc_threeframe_ppm(ppm_precsr) - ppm_ms2_bin <- calc_threeframe_ppm(ppm_ms2) - rm(list = "ppm_precsr") - # Searches df0 <- tibble::tibble(scan_title = integer(), ms1_moverz = numeric(), ms1_mass = numeric(), ms1_int = numeric(), @@ -166,10 +174,8 @@ ms2match <- function (mgf_path, aa_masses_all, out_path, df0 = df0, digits = digits) - qs::qsave(aa_masses_all, file_aa) + qs::qsave(aa_masses_all, faa) - .savecall <- TRUE - invisible(NULL) } @@ -221,26 +227,28 @@ reverse_seqs <- function (seqs) #' MGF precursor mass calibration. -#' +#' #' \code{ppm_ms1} only for the calculation of frame indexes of precursors. -#' +#' +#' @param aa_masses_all List(1); The first list of all amino-acid look-ups. +#' @param mod_indexes Integer; the indexes of fixed and/or variable +#' modifications +#' @param reframe_mgfs Logical; if TRUE, recalculates the frame indexes of MGFs #' @param knots The number of knots for spline fits. #' @inheritParams matchMS calib_mgf <- function (mgf_path = NULL, aa_masses_all = NULL,out_path = NULL, mod_indexes = NULL, type_ms2ions = "by", maxn_vmods_per_pep = 5L,maxn_sites_per_vmod = 3L, maxn_fnl_per_seq = 3L, maxn_vnl_per_seq = 3L, - maxn_vmods_sitescombi_per_pep = 64L, - minn_ms2 = 6L, ppm_ms1 = 20L, ppm_ms1calib = 20L, - ppm_ms2 = 20L, min_mass = 200L, max_mass = 4500L, - min_ms2mass = 115L, - quant = c("none", "tmt6", "tmt10", "tmt11", "tmt16", "tmt18"), + maxn_vmods_sitescombi_per_pep = 64L, minn_ms2 = 6L, + ppm_ms1 = 20L, reframe_mgfs = TRUE, + ppm_ms2 = 20L, min_mass = 200L, + max_mass = 4500L, min_ms2mass = 115L, quant = "none", ppm_reporters = 10L, index_mgf_ms2 = FALSE, - by_modules = TRUE, - fasta = NULL, acc_type = NULL, acc_pattern = NULL, - topn_ms2ions = 100L, fixedmods = NULL, varmods = NULL, - enzyme = "trypsin_p", maxn_fasta_seqs = 200000L, - maxn_vmods_setscombi = 512L, + by_modules = TRUE, fasta = NULL, acc_type = NULL, + acc_pattern = NULL, topn_ms2ions = 100L, + fixedmods = NULL, varmods = NULL, enzyme = "trypsin_p", + maxn_fasta_seqs = 200000L, maxn_vmods_setscombi = 512L, min_len = 7L, max_len = 40L, max_miss = 2L, knots = 50L, digits = 4L) { @@ -252,10 +260,13 @@ calib_mgf <- function (mgf_path = NULL, aa_masses_all = NULL,out_path = NULL, fun <- as.character(match.call()[[1]]) fun_env <- environment() - - args_except <- c("out_path") args <- names(formals(fun)) - args_must <- args[!args %in% args_except] + args_except <- NULL + + if (length(args_except)) + args_must <- args[!args %in% args_except] + else + args_must <- args cache_pars <- find_callarg_vals( time = NULL, @@ -264,21 +275,47 @@ calib_mgf <- function (mgf_path = NULL, aa_masses_all = NULL,out_path = NULL, args = args_must) cache_pars <- cache_pars[sort(names(cache_pars))] - call_pars <- mget(args_must, envir = fun_env, inherits = FALSE) - call_pars <- call_pars[sort(names(call_pars))] - ok_pars <- identical(call_pars, cache_pars) - - if (identical(cache_pars, call_pars) && - check_ms1calib(out_path, calib_ms1mass = TRUE)) { - message("Mass calibration performed previously. \n", - "To recalibrate, delete `Calls/workflow_info.rds`.") + call_pars <- mget(args_must, envir = fun_env, inherits = FALSE) + call_pars <- call_pars[sort(names(call_pars))] + ok_pars <- identical(call_pars, cache_pars) + + if (ok_pars) { + message("Mass calibration performed previously. ", + "Delete `", paste0(fun, ".rda"), "` to recalibrate.") .savecall <- FALSE return(NULL) } + # may need to delete mgf_queries_[...].rds when changing, e.g., from + # ppm_ms1 = 20 to 10; or save a copy of the original mgf_queries + ## the first search + tempdir <- file.path(out_path, "temp") + pat_th <- if (by_modules) "^expttheo_\\d+.*\\.rds$" else "^mgftheo_\\d+.*\\.rds$" + pat_im <- "^ion_matches_\\d+.*\\.rds$" + fs_th <- list.files(mgf_path, pattern = pat_th, full.names = TRUE) + fs_im <- list.files(tempdir, pattern = pat_im, full.names = TRUE) + file.remove(fs_th, recursive = TRUE) + file.remove(fs_im, recursive = TRUE) + + if (!dir.exists(tempdir)) + create_dir(tempdir) + + fi_aa <- file.path(out_path, "aa_masses_all.rds") + fi_mi <- file.path(out_path, "mod_indexes.txt") + + if (!file.exists(fi_aa)) + stop("Amino-acid look-ups not found: ", fi_aa) + if (!file.exists(fi_mi)) + stop("Amino-acid look-ups not found: ", fi_mi) + + fi_aa2 <- file.path(out_path, "Calls", "aa_masses_all.rds") + fi_mi2 <- file.path(out_path, "Calls", "mod_indexes.txt") + file.rename(fi_aa, fi_aa2) + file.rename(fi_mi, fi_mi2) + ms2match(mgf_path = mgf_path, - aa_masses_all = aa_masses_all[1], + aa_masses_all = aa_masses_all, out_path = out_path, mod_indexes = mod_indexes, type_ms2ions = type_ms2ions, @@ -289,18 +326,15 @@ calib_mgf <- function (mgf_path = NULL, aa_masses_all = NULL,out_path = NULL, maxn_vmods_sitescombi_per_pep = maxn_vmods_sitescombi_per_pep, minn_ms2 = minn_ms2, ppm_ms1 = ppm_ms1, - ppm_ms1calib = ppm_ms1calib, ppm_ms2 = ppm_ms2, min_mass = min_mass, max_mass = max_mass, min_ms2mass = min_ms2mass, quant = quant, ppm_reporters = ppm_reporters, - calib_ms1mass = FALSE, + reframe_mgfs = reframe_mgfs, index_mgf_ms2 = index_mgf_ms2, by_modules = by_modules, - - # dummy for argument matching fasta = fasta, acc_type = acc_type, acc_pattern = acc_pattern, @@ -313,71 +347,84 @@ calib_mgf <- function (mgf_path = NULL, aa_masses_all = NULL,out_path = NULL, min_len = min_len, max_len = max_len, max_miss = max_miss, + first_search = TRUE, + .savecall = FALSE, digits = digits) + file.rename(fi_aa2, fi_aa) + file.rename(fi_mi2, fi_mi) + ## mass calibration - files_mgf <- list.files(mgf_path, "^mgf_queries.*\\.rds$") + fs_mgf <- list.files(mgf_path, "^mgf_queries.*\\.rds$") + fi_ion <- file.path(out_path, "temp", "ion_matches_1.rds") - if (!length(files_mgf)) { - stop("No `mgf_queries` files found.") - return(NULL) - } + if (!length(fs_mgf)) + stop("No `mgf_queries` files found for calibrations.") + if (!file.exists(fi_ion)) + stop("No `ion_matches` files found for calibrations.") - ## Calibration - df <- qs::qread(file.path(out_path, "temp", "ion_matches_1.rds")) - dfs <- split(df, df$raw_file) - ord <- gsub("^mgf_queries_(\\d+)\\.rds", "\\1", files_mgf) + df <- qs::qread(fi_ion) + + if (!"raw_file" %in% names(df)) + stop("Column not found in search results: `raw_file`") + + dfs <- split(df, df[["raw_file"]]) + ord <- sort(as.integer(gsub("^mgf_queries_(\\d+)\\.rds", "\\1", fs_mgf))) dfs <- dfs[ord] + fs_mgf <- fs_mgf[ord] rm(list = c("df", "ord")) len <- length(dfs) n_cores <- min(len, detect_cores(32L)) if (len <= 2L) { - mapply(calib_ms1, files_mgf, dfs, + mapply(calib_ms1, fs_mgf, dfs, MoreArgs = list( mgf_path = mgf_path, out_path = out_path, ppm_ms1 = ppm_ms1, min_mass = min_mass, max_mass = max_mass, knots = knots, - digits = digits - ), SIMPLIFY = FALSE, USE.NAMES = FALSE) + digits = digits), + SIMPLIFY = FALSE, + USE.NAMES = FALSE) } else { cl <- parallel::makeCluster(getOption("cl.cores", n_cores)) parallel::clusterExport(cl, "calib_ms1", envir = environment(mzion::matchMS)) parallel::clusterMap( - cl, calib_ms1, files_mgf, dfs, + cl, calib_ms1, fs_mgf, dfs, MoreArgs = list( mgf_path = mgf_path, out_path = out_path, ppm_ms1 = ppm_ms1, min_mass = min_mass, max_mass = max_mass, knots = knots, - digits = digits - ), SIMPLIFY = FALSE, USE.NAMES = FALSE) + digits = digits), + SIMPLIFY = FALSE, + USE.NAMES = FALSE) parallel::stopCluster(cl) } - qs::qsave(c(`passed_ms1calib` = TRUE), - file.path(out_path, "Calls", "workflow_info.rds"), - preset = "fast") - message("Completed precursor mass calibration.\n") + + fs_th <- list.files(mgf_path, pattern = pat_th, full.names = TRUE) + fs_im <- list.files(tempdir, pattern = pat_im, full.names = TRUE) + file.remove(fs_th, recursive = TRUE) + file.remove(fs_im, recursive = TRUE) .savecall <- TRUE - + invisible(NULL) } -#' Calibration precursor masses by individual RAW_Files. +#' Calibrates precursor masses (by individual RAW_Files) #' -#' @param file_mgf An MGF file name +#' @param filename An MGF file name #' @param df A data frame of \code{ion_matches_1.rds} #' @inheritParams calib_mgf -calib_ms1 <- function (file_mgf, df = NULL, mgf_path = NULL, out_path = NULL, +calib_ms1 <- function (filename, df = NULL, mgf_path = NULL, out_path = NULL, ppm_ms1 = 20L, min_mass = 200L, max_mass = 4500L, knots = 50L, digits = 4L) { - mgfs <- qs::qread(file.path(mgf_path, file_mgf)) + mgfs <- qs::qread(file.path(mgf_path, filename)) # subsets by minn_ms2 and ms1_int if (FALSE) { @@ -400,15 +447,9 @@ calib_ms1 <- function (file_mgf, df = NULL, mgf_path = NULL, out_path = NULL, }) theo_ms1 <- .Internal(unlist(theo_ms1, recursive = FALSE, use.names = FALSE)) - expt_ms1 <- df[["pep_exp_mr"]] - diff_ms1 <- (expt_ms1 - theo_ms1)/theo_ms1 * 1E6 + diff_ms1 <- (df[["pep_exp_mr"]] - theo_ms1)/theo_ms1 * 1E6 ret_time <- df[["pep_ret_range"]] - ppm_err <- floor(median(abs(diff_ms1), na.rm = TRUE)) - ppm_ms1calib <- max(ppm_ms1 - ppm_err * 2L, 4L) - ppm_ms1_bin <- calc_threeframe_ppm(ppm_ms1calib) - - qs::qsave(c(`ppm_ms1_bf` = ppm_ms1, `ppm_ms1_af` = ppm_ms1calib), - file.path(mgf_path, paste0("ppm_ms1calib_", file_mgf))) + ppm_ms1_bin <- calc_threeframe_ppm(ppm_ms1) fit_ns <- tryCatch( lm(diff_ms1 ~ splines::ns(ret_time, knots)), @@ -421,12 +462,7 @@ calib_ms1 <- function (file_mgf, df = NULL, mgf_path = NULL, out_path = NULL, res_ns <- if (class(fit_ns) == "lm") sum(resid(fit_ns)^2, na.rm = TRUE) else Inf res_bs <- if (class(fit_bs) == "lm") sum(resid(fit_bs)^2, na.rm = TRUE) else Inf fit <- if (res_ns <= res_bs) fit_ns else fit_bs - - ## Update df - pred <- predict.lm(fit, newdata = data.frame(ret_time = ret_time)) / 1E6 - df[["pep_exp_mr"]] <- round(df[["pep_exp_mr"]] * (1 - pred), digits = digits) - df[["pep_frame"]] <- find_ms1_interval(df[["pep_frame"]], from = min_mass, ppm = ppm_ms1_bin) - + # (keeps the original df$ms1_mass -> can later infer mass deltas) # charges <- get_ms1charges(df[["ms1_charge"]]) # df[["ms1_moverz"]] <- (df[["ms1_mass"]] + 1.00727647 * charges)/charges @@ -452,19 +488,19 @@ calib_ms1 <- function (file_mgf, df = NULL, mgf_path = NULL, out_path = NULL, mgfs[["ms1_mass"]][!oks_gr] <- ms1_gr - ms1_gr * err_gr } - ## Update MGF + ## update MGF mgfs <- mgfs %>% dplyr::arrange(ms1_mass) %>% dplyr::filter(ms1_mass >= min_mass, ms1_mass <= max_mass) %>% - dplyr::mutate(frame = find_ms1_interval(ms1_mass, from = min_mass, - ppm = ppm_ms1_bin)) - + dplyr::mutate( + frame = find_ms1_interval(ms1_mass, from = min_mass, ppm = ppm_ms1_bin)) + # charges <- get_ms1charges(mgfs[["ms1_charge"]]) # mgfs[["ms1_moverz"]] <- (mgfs[["ms1_mass"]] + 1.00727647 * charges)/charges .savecall <- TRUE - qs::qsave(mgfs, file.path(mgf_path, file_mgf), preset = "fast") + qs::qsave(mgfs, file.path(mgf_path, filename), preset = "fast") } diff --git a/R/scores.R b/R/scores.R index e312a65..cbf4c16 100644 --- a/R/scores.R +++ b/R/scores.R @@ -809,11 +809,17 @@ split_im <- function (files, sc_path, tempdir, max_size = 10000000) #' Order fractions #' -#' @param type The type of files -#' @param tempdir A temporary directory containing the files -order_fracs <- function (type = "list_table", tempdir) +#' @param type The type of files. +#' @param tempdir A temporary directory containing the files. +#' @param by_modules Logical; if TRUE, performs searches by modules. +order_fracs <- function (type = "list_table", tempdir, by_modules = TRUE) { - files <- list.files(tempdir, pattern = paste0("^", type, "_\\d+_\\d+.*")) + files <- if (by_modules) + list.files(tempdir, pattern = paste0("^", type, "_\\d+_\\d+.*")) + else + list.files(tempdir, pattern = paste0("^", type, "_\\d+(_){0,1}\\d*.*")) + + # all NA's if by_modules = FALSE idxes <- as.integer(gsub(paste0("^", type, "_(\\d+).*"), "\\1", files)) fracs <- as.integer(gsub(paste0("^", type, "_\\d+_(\\d+).*"), "\\1", files)) @@ -899,7 +905,6 @@ find_targets <- function (out_path, pattern = "^ion_matches_") #' @param pep_fmod_all Attributes of \code{pep_fmod} from \code{aa_masses_all} #' @param pep_vmod_all Attributes of \code{pep_vmod} from \code{aa_masses_all} #' @param d2 Bin width in ppm divided by 1E6 -#' @param n_cores The number of CPU cores #' @inheritParams matchMS #' @inheritParams calc_pepscores calcpepsc <- function (file, im_path, pep_fmod_all, pep_vmod_all, @@ -1054,7 +1059,7 @@ calcpepsc <- function (file, im_path, pep_fmod_all, pep_vmod_all, hadd_primatches <- function (out_path = NULL, add_ms2theos = FALSE, add_ms2theos2 = FALSE, add_ms2moverzs = FALSE, add_ms2ints = FALSE, - index_mgf_ms2 = FALSE) + by_modules = TRUE, index_mgf_ms2 = FALSE) { # the same as those in calcpepsc cols_sc <- c("pep_seq", "pep_n_ms2", "pep_scan_title", "pep_exp_mz", "pep_exp_mr", @@ -1090,7 +1095,7 @@ hadd_primatches <- function (out_path = NULL, index_mgf_ms2 = index_mgf_ms2) parallel::stopCluster(cl) - ms_files <- order_fracs(type = "ms2info", tempdir) + ms_files <- order_fracs(type = "ms2info", tempdir, by_modules) mapply(function (fis, idx) { df <- lapply(fis, function (x) qs::qread(file.path(tempdir, x))) @@ -1107,7 +1112,8 @@ hadd_primatches <- function (out_path = NULL, preset = "fast") }, ms_files, names(ms_files)) - lapply(order_fracs("reporters", tempdir), combine_fracs, tempdir, tempdir) + lapply(order_fracs("reporters", tempdir, by_modules), + combine_fracs, tempdir, tempdir) message("Completed theoretical MS2 m/z and intensity values: ", Sys.time()) diff --git a/R/utils_engine.R b/R/utils_engine.R index 53326af..b7e207c 100644 --- a/R/utils_engine.R +++ b/R/utils_engine.R @@ -506,7 +506,7 @@ is_equal_sets <- function(x, y) all(x %in% y) && all(y %in% x) #' #' x <- list(`Bar (M)` = c(0, 3)) #' mzion:::expand_grid_rows(x) -expand_grid_rows <- function (..., nmax = 8L, use.names = TRUE) +expand_grid_rows <- function (..., nmax = 3L, use.names = TRUE) { args <- list(...)[[1]] nargs <- length(args) @@ -551,6 +551,56 @@ expand_grid_rows <- function (..., nmax = 8L, use.names = TRUE) } +#' Modified from expand.grid +#' +#' Net yet used. +#' +#' @param nmax The maximum number of combinations allowed. +#' @param ... Lists of data. +expand_grid <- function (..., nmax = 3L) +{ + nargs <- length(args <- list(...)) + + if (!nargs) + return(as.data.frame(list())) + + if (nargs == 1L && is.list(a1 <- args[[1L]])) + nargs <- length(args <- a1) + if (nargs == 0L) + return(as.data.frame(list())) + + cargs <- vector("list", nargs) + iArgs <- seq_len(nargs) + nmc <- paste0("Var", iArgs) + nm <- names(args) + nm <- nmc + names(cargs) <- nmc + + rep.fac <- 1L + d <- lengths(args) + orep <- prod(d) + nmax <- min(nmax, orep) + + if (orep == 0L) { + for (i in iArgs) cargs[[i]] <- args[[i]][FALSE] + } + else { + for (i in iArgs) { + x <- args[[i]] + nx <- length(x) + orep <- orep/nx + + x <- x[rep_len(rep.int(seq_len(nx), rep.int(rep.fac, nx)), nmax)] + cargs[[i]] <- x + rep.fac <- rep.fac * nx + } + } + + rn <- .set_row_names(as.integer(prod(d))) + structure(cargs, class = "data.frame", row.names = rn) +} + + #' Expands grids. #' #' Outputs are vectors corresponding to rows in the the data.frame from the @@ -862,38 +912,6 @@ calc_threeframe_ppm <- function (ppm = 20L, is_three_frame = TRUE, fct_ppm = .5) } -#' Checks the status of a prior execution of precursor mass calibration. -#' -#' @inheritParams matchMS -#' @return TRUE if without precursor mass calibration. -check_ms1calib <- function(out_path = NULL, calib_ms1mass = FALSE) -{ - workflow_file <- file.path(out_path, "Calls", "workflow_info.rds") - - passed_ms1calib <- if (calib_ms1mass) { - if (file.exists(workflow_file)) - qs::qread(workflow_file)[["passed_ms1calib"]] - else - FALSE - } - else - TRUE - - if (is.null(passed_ms1calib)) FALSE else passed_ms1calib -} - - -#' Saves the \code{ppm_ms1} before and after calibration. -#' -#' @param ppm_ms1calib The mass error after calibration in ppm. -#' @inheritParams matchMS -save_ms1calib <- function (ppm_ms1, ppm_ms1calib, mgf_path) -{ - info_calib <- c(`ppm_ms1_bf` = ppm_ms1, `ppm_ms1_af` = ppm_ms1calib) - qs::qsave(info_calib, file.path(mgf_path, "ppm_ms1calib.rds")) -} - - #' Gets the MS1 charges. #' #' @param charges A vector of \code{2+, 3+} etc. diff --git a/R/utils_ui.R b/R/utils_ui.R index b0d9b0a..1e1a410 100644 --- a/R/utils_ui.R +++ b/R/utils_ui.R @@ -367,7 +367,8 @@ calc_ms2ionseries <- function (aa_seq, fixedmods, varmods, maxn_vmods_per_pep = 5L, maxn_sites_per_vmod = 3L, maxn_vmods_sitescombi_per_pep = 32L, - digits = 4L) + maxn_fnl_per_seq = 3L, + maxn_vnl_per_seq = 3L, digits = 4L) { options(digits = 9L) @@ -386,8 +387,11 @@ calc_ms2ionseries <- function (aa_seq, fixedmods, varmods, ms <- purrr::map2(peps, aa_masses_all, function (x, y) { pri <- calc_ms2ions(x, ms1_mass, y, mod_indexes, type_ms2ions, - maxn_vmods_per_pep, maxn_sites_per_vmod, - maxn_vmods_sitescombi_per_pep, digits) + maxn_vmods_per_pep = maxn_vmods_per_pep, + maxn_sites_per_vmod = maxn_sites_per_vmod, + maxn_vmods_sitescombi_per_pep = maxn_vmods_sitescombi_per_pep, + maxn_fnl_per_seq = maxn_fnl_per_seq, + maxn_vnl_per_seq = maxn_vnl_per_seq, digits) sec <- lapply(pri, add_seions, type_ms2ions = type_ms2ions, digits = digits) @@ -440,7 +444,9 @@ calc_ms2ionseries <- function (aa_seq, fixedmods, varmods, calc_ms2ions <- function (aa_seq, ms1_mass = NULL, aa_masses, mod_indexes = NULL, type_ms2ions = "by", maxn_vmods_per_pep = 5L, maxn_sites_per_vmod = 3L, - maxn_vmods_sitescombi_per_pep = 64L, digits = 4L) + maxn_vmods_sitescombi_per_pep = 64L, + maxn_fnl_per_seq = 3L, maxn_vnl_per_seq = 3L, + digits = 4L) { # tmt6_mass <- 229.162932 # tmtpro_mass <- 304.207146 @@ -462,201 +468,58 @@ calc_ms2ions <- function (aa_seq, ms1_mass = NULL, aa_masses, mod_indexes = NULL aas <- stringr::str_split(aa_seq, "", simplify = TRUE) type <- attr(aa_masses, "type", exact = TRUE) - # (1, 2) "amods- tmod+ vnl- fnl-", "amods- tmod- vnl- fnl-" - if (type %in% c("amods- tmod- vnl- fnl-", "amods- tmod+ vnl- fnl-")) { - ntmod <- attr(aa_masses, "ntmod", exact = TRUE) - ctmod <- attr(aa_masses, "ctmod", exact = TRUE) - - ntmass <- if (length(ntmod)) - aa_masses[names(ntmod)] + 1.00727647 - else - aa_masses["N-term"] - 0.000549 - - ctmass <- if (length(ctmod)) - aa_masses[names(ctmod)] + 2.01510147 - else - aa_masses["C-term"] + 2.01510147 - - ans <- gen_ms2ions_base(aa_seq = aa_seq, ms1_mass = ms1_mass, - aa_masses = aa_masses, - ms1vmods = NULL, ms2vmods = NULL, - ntmod = ntmod, ctmod = ctmod, - ntmass = ntmass, ctmass = ctmass, - amods = NULL, vmods_nl = NULL, fmods_nl = NULL, - mod_indexes = mod_indexes, - type_ms2ions = type_ms2ions, - maxn_vmods_per_pep = maxn_vmods_per_pep, - maxn_sites_per_vmod = maxn_sites_per_vmod, - maxn_vmods_sitescombi_per_pep = - maxn_vmods_sitescombi_per_pep, - digits = digits) - - return(ans) + FUN <- if (type %in% c("amods- tmod- vnl- fnl-", "amods- tmod+ vnl- fnl-")) + "gen_ms2ions_base" + else if (type %in% c("amods- tmod- vnl- fnl+", "amods- tmod+ vnl- fnl+")) + "gen_ms2ions_a0_vnl0_fnl1" + else if (type %in% c("amods+ tmod- vnl- fnl-", "amods+ tmod+ vnl- fnl-")) + "gen_ms2ions_a1_vnl0_fnl0" + else if (type %in% c("amods+ tmod- vnl+ fnl-", "amods+ tmod+ vnl+ fnl-")) + "gen_ms2ions_a1_vnl1_fnl0" + else if (type %in% c("amods+ tmod- vnl- fnl+", "amods+ tmod+ vnl- fnl+")) + "gen_ms2ions_a1_vnl0_fnl1" + else { + # "amods+ tmod- vnl+ fnl+" + message("Unknown modification type.") + return(NULL) } - - # (5, 6) "amods- tmod+ vnl- fnl+", "amods- tmod- vnl- fnl+" - # (mutual exclusive btw. (1, 2) and (5, 6) - # "ANY" fmod has neuloss -> 5, 6; - # "ALL" fmods have no neuloss -> 1, 2) - - if (type %in% c("amods- tmod- vnl- fnl+", "amods- tmod+ vnl- fnl+")) { - ntmod <- attr(aa_masses, "ntmod", exact = TRUE) - - ntmass <- if (length(ntmod)) - aa_masses[names(ntmod)] + 1.00727647 - else - aa_masses["N-term"] - 0.000549 - - ctmod <- attr(aa_masses, "ctmod", exact = TRUE) - - ctmass <- if (length(ctmod)) - aa_masses[names(ctmod)] + 2.01510147 - else - aa_masses["C-term"] + 2.01510147 - fmods_nl <- attr(aa_masses, "fmods_nl", exact = TRUE) - - ans <- gen_ms2ions_a0_vnl0_fnl1(aa_seq = aa_seq, ms1_mass = ms1_mass, - aa_masses = aa_masses, - ntmod = ntmod, ctmod = ctmod, - ntmass = ntmass, ctmass = ctmass, - fmods_nl = fmods_nl, - mod_indexes = mod_indexes, - type_ms2ions = type_ms2ions, - maxn_vmods_per_pep = maxn_vmods_per_pep, - maxn_sites_per_vmod = maxn_sites_per_vmod, - maxn_vmods_sitescombi_per_pep = - maxn_vmods_sitescombi_per_pep, - digits = digits) - - return(ans) - } - - ms1vmods <- make_ms1vmod_i(aa_masses = aa_masses, - maxn_vmods_per_pep = maxn_vmods_per_pep, + ntmod <- attr(aa_masses, "ntmod", exact = TRUE) + ctmod <- attr(aa_masses, "ctmod", exact = TRUE) + ntmass <- find_nterm_mass(aa_masses) + ctmass <- find_cterm_mass(aa_masses) + fmods_nl <- attr(aa_masses, "fmods_nl", exact = TRUE) + vmods_nl <- attr(aa_masses, "vmods_nl", exact = TRUE) + amods <- attr(aa_masses, "amods", exact = TRUE) + ms1vmods <- make_ms1vmod_i(aa_masses, maxn_vmods_per_pep = maxn_vmods_per_pep, maxn_sites_per_vmod = maxn_sites_per_vmod) - ms2vmods <- lapply(ms1vmods, make_ms2vmods) - # (7, 8) "amods+ tmod- vnl- fnl-", "amods+ tmod+ vnl- fnl-" - # (ALL amods are vnl-) - - if (type %in% c("amods+ tmod- vnl- fnl-", "amods+ tmod+ vnl- fnl-")) { - ntmod <- attr(aa_masses, "ntmod", exact = TRUE) - - ntmass <- if (length(ntmod)) - aa_masses[names(ntmod)] + 1.00727647 - else - aa_masses["N-term"] - 0.000549 - - ctmod <- attr(aa_masses, "ctmod", exact = TRUE) - - ctmass <- if (length(ctmod)) - aa_masses[names(ctmod)] + 2.01510147 - else - aa_masses["C-term"] + 2.01510147 - - amods <- attr(aa_masses, "amods", exact = TRUE) - - ans <- gen_ms2ions_a1_vnl0_fnl0(aa_seq = aa_seq, ms1_mass = ms1_mass, - aa_masses = aa_masses, - ms1vmods = ms1vmods, ms2vmods = ms2vmods, - ntmod = ntmod, ctmod = ctmod, - ntmass = ntmass, ctmass = ctmass, - amods = amods, - vmods_nl = NULL, fmods_nl = NULL, - mod_indexes = mod_indexes, - type_ms2ions = type_ms2ions, - maxn_vmods_per_pep = maxn_vmods_per_pep, - maxn_sites_per_vmod = maxn_sites_per_vmod, - maxn_vmods_sitescombi_per_pep = - maxn_vmods_sitescombi_per_pep, - digits = digits) - - return(ans) - } - - # (9, 10) "amods+ tmod- vnl+ fnl-", "amods+ tmod+ vnl+ fnl-" - # (ANY amod is vnl+) - - if (type %in% c("amods+ tmod- vnl+ fnl-", - "amods+ tmod+ vnl+ fnl-")) { - - ntmod <- attr(aa_masses, "ntmod", exact = TRUE) - - ntmass <- if (length(ntmod)) - aa_masses[names(ntmod)] + 1.00727647 - else - aa_masses["N-term"] - 0.000549 - - ctmod <- attr(aa_masses, "ctmod", exact = TRUE) - - ctmass <- if (length(ctmod)) - aa_masses[names(ctmod)] + 2.01510147 - else - aa_masses["C-term"] + 2.01510147 - - amods <- attr(aa_masses, "amods", exact = TRUE) - vmods_nl <- attr(aa_masses, "vmods_nl", exact = TRUE) - - ans <- gen_ms2ions_a1_vnl1_fnl0(aa_seq = aa_seq, ms1_mass = ms1_mass, - aa_masses = aa_masses, - ms1vmods = ms1vmods, ms2vmods = ms2vmods, - ntmod = ntmod, ctmod = ctmod, - ntmass = ntmass, ctmass = ctmass, - amods = amods, vmods_nl = vmods_nl, - mod_indexes = mod_indexes, - type_ms2ions = type_ms2ions, - maxn_vmods_per_pep = maxn_vmods_per_pep, - maxn_sites_per_vmod = maxn_sites_per_vmod, - maxn_vmods_sitescombi_per_pep = - maxn_vmods_sitescombi_per_pep, - digits = digits) - - return(ans) - } - - # (11, 12) "amods+ tmod- vnl- fnl+", "amods+ tmod+ vnl- fnl+" - # (mutual exclusive btw. (11, 12) and (7, 8); - # logicial ANY versus ALL) - - if (type %in% c("amods+ tmod- vnl- fnl+", "amods+ tmod+ vnl- fnl+")) { - - ntmod <- attr(aa_masses, "ntmod", exact = TRUE) - - ntmass <- if (length(ntmod)) - aa_masses[names(ntmod)] + 1.00727647 - else - aa_masses["N-term"] - 0.000549 - - ctmod <- attr(aa_masses, "ctmod", exact = TRUE) - - ctmass <- if (length(ctmod)) - aa_masses[names(ctmod)] + 2.01510147 - else - aa_masses["C-term"] + 2.01510147 - - amods <- attr(aa_masses, "amods", exact = TRUE) - fmods_nl <- attr(aa_masses, "fmods_nl", exact = TRUE) - - ans <- gen_ms2ions_a1_vnl0_fnl1(aa_seq = aa_seq, ms1_mass = ms1_mass, - aa_masses = aa_masses, - ms1vmods = ms1vmods, ms2vmods = ms2vmods, - ntmod = ntmod, ctmod = ctmod, - ntmass = ntmass, ctmass = ctmass, - amods = amods, fmods_nl = fmods_nl, - mod_indexes = mod_indexes, - type_ms2ions = type_ms2ions, - maxn_vmods_per_pep = maxn_vmods_per_pep, - maxn_sites_per_vmod = maxn_sites_per_vmod, - maxn_vmods_sitescombi_per_pep = - maxn_vmods_sitescombi_per_pep, - digits = digits) - - return(ans) - } - - ans <- NULL + fmods_nl <- if (length(fmods_nl)) fmods_nl else NULL + vmods_nl <- if (length(vmods_nl)) vmods_nl else NULL + amods <- if (length(amods)) amods else NULL + ms1vmods <- if (length(ms1vmods)) ms1vmods else NULL + ms2vmods <- if (length(ms2vmods)) ms2vmods else NULL + + do.call(FUN, + list(aa_seq = aa_seq, + ms1_mass = ms1_mass, + aa_masses = aa_masses, + ms1vmods = ms1vmods, + ms2vmods = ms2vmods, + ntmod = ntmod, + ctmod = ctmod, + ntmass = ntmass, + ctmass = ctmass, + amods = amods, vmods_nl = vmods_nl, fmods_nl = fmods_nl, + mod_indexes = mod_indexes, + type_ms2ions = type_ms2ions, + maxn_vmods_per_pep = maxn_vmods_per_pep, + maxn_sites_per_vmod = maxn_sites_per_vmod, + maxn_fnl_per_seq = maxn_fnl_per_seq, + maxn_vnl_per_seq = maxn_vnl_per_seq, + maxn_vmods_sitescombi_per_pep = maxn_vmods_sitescombi_per_pep, + digits = digits)) } diff --git a/R/vmod_ms1_labels.R b/R/vmod_ms1_labels.R index 8093bed..edda89b 100644 --- a/R/vmod_ms1_labels.R +++ b/R/vmod_ms1_labels.R @@ -90,7 +90,7 @@ match_mvmods <- function (aas = NULL, ms1vmods = NULL, amods = NULL) rows[i] <- all(ps <= max_rs) } - rows <- which(rows) + rows <- .Internal(which(rows)) list(ms1 = ms1vmods[rows], inds = rows) } diff --git a/R/vmod_ms2_labels.R b/R/vmod_ms2_labels.R index 6757b12..4cd18f8 100644 --- a/R/vmod_ms2_labels.R +++ b/R/vmod_ms2_labels.R @@ -171,23 +171,40 @@ find_vmodscombi <- function (aas = NULL, ms2vmods = NULL, #' Note that M is a matrix other than lists of vectors, which allows the #' application of one copy of attributes to all rows. #' @param aas \code{aa_seq} split in a sequence of LETTERS. +#' @examples +#' \donttest{ +#' library(mzion) +#' +#' aa_seq <- "MHQGVMNVNMGQKMNS" +#' aas <- .Internal(strsplit(aa_seq, "", fixed = TRUE, perl = FALSE, useBytes = FALSE)) +#' aas <- .Internal(unlist(aas, recursive = FALSE, use.names = FALSE)) +#' +#' m <- c("M", "M", "N", "N") +#' labs <- ps <- c(2, 2) +#' names(ps) <- c("M", "N") +#' names(labs) <- c("Oxidation (M)", "Deamidated (N)") +#' +#' M <- c("Oxidation (M)", "Oxidation (M)", "Deamidated (N)", "Deamidated (N)") +#' M <- matrix(M, ncol = 4) +#' attr(M, "ps") <- ps +#' attr(M, "resids") <- m +#' +#' mzion:::combi_namesiteU(M, aas) +#' } combi_namesiteU <- function (M, aas) { m <- attr(M, "resids") ps <- attr(M, "ps") + ss <- names(ps) - ans <- find_vmodposU(m, ps, aas) - combi <- ans$combi - vpos <- ans$vpos # ordinal column indexes of the output - - # replace the ordinal column indexes with aas indexes (faster than unlist) + combi <- find_vmodposU(m, ps, aas) # add a size limit? len_out <- nrow(combi) out <- rep(list(M[1, ]), len_out) cols <- seq_len(len_out) - - for (i in seq_along(vpos)) { # by residue + + for (i in seq_along(ps)) { # by residue ansi <- combi[[i]] - pi <- vpos[[i]] + pi <- .Internal(which(m == ss[i])) for (j in cols) names(out[[j]])[pi] <- ansi[[j]] # by combi @@ -207,24 +224,32 @@ combi_namesiteU <- function (M, aas) #' @param ps Named vector; counts for each site. Sites in names and counts in #' values. #' @param aas \code{aa_seq} split in a sequence of LETTERS. +#' @examples +#' \donttest{ +#' library(mzion) +#' +#' m <- c("M", "M", "N", "N") +#' ps <- c(2, 2) +#' names(ps) <- c("M", "N") +#' +#' aa_seq <- "MHQGVMNVNMGQKMNS" +#' aas <- .Internal(strsplit(aa_seq, "", fixed = TRUE, perl = FALSE, useBytes = FALSE)) +#' aas <- .Internal(unlist(aas, recursive = FALSE, use.names = FALSE)) +#' ans <- mzion:::find_vmodposU(m, ps, aas) +#' } find_vmodposU <- function (vec, ps, aas) { - nres <- length(ps) - M <- vpos <- vector("list", nres) + X <- vector("list", length(ps)) - for (i in seq_len(nres)) { + for (i in seq_along(ps)) { resid <- names(ps)[i] # M - aapos <- which(aas == resid) # M:5, 9, 13; N: 6, 14 + aapos <- .Internal(which(aas == resid)) # M:5, 9, 13; N: 6, 14 ct <- ps[[i]] # M: 2; N: 1 - vpos[[i]] <- which(vec == resid) # M: 1, 2; N: 3 - M[[i]] <- if (ct == 1L) vec_to_list(aapos) else sim_combn(aapos, ct) + X[[i]] <- if (ct == 1L) vec_to_list(aapos) else sim_combn(aapos, ct) } - list( - combi = expand.grid(M, KEEP.OUT.ATTRS = FALSE, stringsAsFactors = FALSE), - vpos = vpos - ) + expand.grid(X, KEEP.OUT.ATTRS = FALSE, stringsAsFactors = FALSE) } @@ -286,15 +311,15 @@ combi_namesiteM <- function (M, aas, nrows) #' @inheritParams find_vmodposU find_vmodposM <- function (Vec, vec, ps, aas) { - nres <- length(ps) - M <- vpos <- vector("list", nres) + # nr <- length(ps) + M <- P <- vector("list", length(ps)) - for (i in seq_len(nres)) { # by residues + for (i in seq_along(ps)) { # by residues resid <- names(ps)[i] # M - aapos <- which(aas == resid) # M:5, 9, 13; N: 6, 14 + aapos <- .Internal(which(aas == resid)) # M:5, 9, 13; N: 6, 14 ct <- ps[[i]] # M: 2; N: 1 - vpos[[i]] <- which(vec == resid) # M: 1, 2; N: 3 + P[[i]] <- .Internal(which(vec == resid)) # M: 1, 2; N: 3 M[[i]] <- if (ct == 1L) vec_to_list(aapos) else sim_combn(aapos, ct) } @@ -303,9 +328,9 @@ find_vmodposM <- function (Vec, vec, ps, aas) len_out <- nrow(ans) out <- rep(list(Vec), len_out) - for (i in seq_along(vpos)) { # by residue + for (i in seq_along(P)) { # by residue ansi <- ans[[i]] - pi <- vpos[[i]] + pi <- P[[i]] for (j in seq_len(len_out)) names(out[[j]])[pi] <- ansi[[j]] # by combi diff --git a/man/calc_ms2ions.Rd b/man/calc_ms2ions.Rd index 1704c4d..ef6540e 100644 --- a/man/calc_ms2ions.Rd +++ b/man/calc_ms2ions.Rd @@ -13,6 +13,8 @@ calc_ms2ions( maxn_vmods_per_pep = 5L, maxn_sites_per_vmod = 3L, maxn_vmods_sitescombi_per_pep = 64L, + maxn_fnl_per_seq = 3L, + maxn_vnl_per_seq = 3L, digits = 4L ) } @@ -42,6 +44,12 @@ variable modifications per site in a per peptide sequence.} \item{maxn_vmods_sitescombi_per_pep}{Integer; the maximum number of combinatorial variable modifications per peptide sequence.} +\item{maxn_fnl_per_seq}{A non-negative integer; the maximum number of +permutative neutral losses per peptide sequence for fixed modifications.} + +\item{maxn_vnl_per_seq}{A non-negative integer; the maximum number of +permutative neutral losses per peptide sequence for variable modifications.} + \item{digits}{Integer; the number of decimal places to be used.} } \description{ diff --git a/man/calc_ms2ionseries.Rd b/man/calc_ms2ionseries.Rd index b64b4a0..474b767 100644 --- a/man/calc_ms2ionseries.Rd +++ b/man/calc_ms2ionseries.Rd @@ -14,6 +14,8 @@ calc_ms2ionseries( maxn_vmods_per_pep = 5L, maxn_sites_per_vmod = 3L, maxn_vmods_sitescombi_per_pep = 32L, + maxn_fnl_per_seq = 3L, + maxn_vnl_per_seq = 3L, digits = 4L ) } @@ -45,6 +47,12 @@ variable modifications per site in a per peptide sequence.} \item{maxn_vmods_sitescombi_per_pep}{Integer; the maximum number of combinatorial variable modifications per peptide sequence.} +\item{maxn_fnl_per_seq}{A non-negative integer; the maximum number of +permutative neutral losses per peptide sequence for fixed modifications.} + +\item{maxn_vnl_per_seq}{A non-negative integer; the maximum number of +permutative neutral losses per peptide sequence for variable modifications.} + \item{digits}{Integer; the number of decimal places to be used.} } \description{ diff --git a/man/calc_pepscores.Rd b/man/calc_pepscores.Rd index 86a4c32..c709507 100644 --- a/man/calc_pepscores.Rd +++ b/man/calc_pepscores.Rd @@ -195,6 +195,11 @@ sets of combinatorial variable modifications. The default is 512.} \item{add_ms2ints}{Logical; if TRUE, adds the sequence of experimental MS2 intensity values (\code{pep_ms2_ints}).} +\item{by_modules}{Logical. Experimenting. At the TRUE default, searches MS +data by individual modules of combinatorial fixed and variable +modifications. If FALSE, search all modules together. The later would +probably need more than 32G RAM if the number of modules is over 96.} + \item{digits}{A non-negative integer; the number of decimal places to be used. The default is 4.} } diff --git a/man/calcpepsc.Rd b/man/calcpepsc.Rd index 4367f7e..6549e4a 100644 --- a/man/calcpepsc.Rd +++ b/man/calcpepsc.Rd @@ -112,10 +112,13 @@ plexes. For example, apply \code{tmt16} for \code{tmt12} provided a set of \item{ppm_reporters}{A positive integer; the mass tolerance of MS2 reporter ions. The default is 10.} +\item{by_modules}{Logical. Experimenting. At the TRUE default, searches MS +data by individual modules of combinatorial fixed and variable +modifications. If FALSE, search all modules together. The later would +probably need more than 32G RAM if the number of modules is over 96.} + \item{digits}{A non-negative integer; the number of decimal places to be used. The default is 4.} - -\item{n_cores}{The number of CPU cores} } \description{ Helper of \link{calc_pepscores}. diff --git a/man/calib_mgf.Rd b/man/calib_mgf.Rd index 49adbae..5400e17 100644 --- a/man/calib_mgf.Rd +++ b/man/calib_mgf.Rd @@ -17,12 +17,12 @@ calib_mgf( maxn_vmods_sitescombi_per_pep = 64L, minn_ms2 = 6L, ppm_ms1 = 20L, - ppm_ms1calib = 20L, + reframe_mgfs = TRUE, ppm_ms2 = 20L, min_mass = 200L, max_mass = 4500L, min_ms2mass = 115L, - quant = c("none", "tmt6", "tmt10", "tmt11", "tmt16", "tmt18"), + quant = "none", ppm_reporters = 10L, index_mgf_ms2 = FALSE, by_modules = TRUE, @@ -53,8 +53,13 @@ calib_mgf( With MSConvert, the default \code{titleMaker} is required for correct parsing (don't think it can be altered by users, but just in case).} +\item{aa_masses_all}{List(1); The first list of all amino-acid look-ups.} + \item{out_path}{A file path of outputs.} +\item{mod_indexes}{Integer; the indexes of fixed and/or variable +modifications} + \item{type_ms2ions}{Character; the type of \href{http://www.matrixscience.com/help/fragmentation_help.html}{ MS2 ions}. Values are in one of "by", "ax" and "cz". The default is "by" for b- @@ -91,9 +96,7 @@ not part of the threshold.} \item{ppm_ms1}{A positive integer; the mass tolerance of MS1 species. The default is 20.} -\item{ppm_ms1calib}{A positive integer; the mass tolerance of MS1 species for -precursor mass calibration. The default is 10. The argument has no effect -at \code{calib_ms1mass = FALSE}.} +\item{reframe_mgfs}{Logical; if TRUE, recalculates the frame indexes of MGFs} \item{ppm_ms2}{A positive integer; the mass tolerance of MS2 species. The default is 20.} @@ -134,6 +137,11 @@ ions. The default is 10.} the fields of \code{pep_ms2_deltas, pep_ms2_deltas2, pep_ms2_deltas_mean, pep_ms2_deltas_sd} are nullified in the outputs.} +\item{by_modules}{Logical. Experimenting. At the TRUE default, searches MS +data by individual modules of combinatorial fixed and variable +modifications. If FALSE, search all modules together. The later would +probably need more than 32G RAM if the number of modules is over 96.} + \item{fasta}{Character string(s) to the name(s) of fasta file(s) with prepended directory path. The experimenter needs to supply the files.} diff --git a/man/calib_ms1.Rd b/man/calib_ms1.Rd index 94fcd01..d36bae1 100644 --- a/man/calib_ms1.Rd +++ b/man/calib_ms1.Rd @@ -2,10 +2,10 @@ % Please edit documentation in R/msmsmatches2.R \name{calib_ms1} \alias{calib_ms1} -\title{Calibration precursor masses by individual RAW_Files.} +\title{Calibrates precursor masses (by individual RAW_Files)} \usage{ calib_ms1( - file_mgf, + filename, df = NULL, mgf_path = NULL, out_path = NULL, @@ -17,7 +17,7 @@ calib_ms1( ) } \arguments{ -\item{file_mgf}{An MGF file name} +\item{filename}{An MGF file name} \item{df}{A data frame of \code{ion_matches_1.rds}} @@ -49,5 +49,5 @@ interrogation.} used. The default is 4.} } \description{ -Calibration precursor masses by individual RAW_Files. +Calibrates precursor masses (by individual RAW_Files) } diff --git a/man/check_ms1calib.Rd b/man/check_ms1calib.Rd deleted file mode 100644 index 84928d0..0000000 --- a/man/check_ms1calib.Rd +++ /dev/null @@ -1,20 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/utils_engine.R -\name{check_ms1calib} -\alias{check_ms1calib} -\title{Checks the status of a prior execution of precursor mass calibration.} -\usage{ -check_ms1calib(out_path = NULL, calib_ms1mass = FALSE) -} -\arguments{ -\item{out_path}{A file path of outputs.} - -\item{calib_ms1mass}{Temporarily diabled. Logical; if TRUE, calibrates -precursor masses.} -} -\value{ -TRUE if without precursor mass calibration. -} -\description{ -Checks the status of a prior execution of precursor mass calibration. -} diff --git a/man/combi_namesiteU.Rd b/man/combi_namesiteU.Rd index 04e7861..767ec7b 100644 --- a/man/combi_namesiteU.Rd +++ b/man/combi_namesiteU.Rd @@ -20,3 +20,24 @@ combi_namesiteU(M, aas) One-to-one correspondence between Names and Sites. Finds the positions of residues (sites) from a given amino acid sequence (aas). } +\examples{ +\donttest{ +library(mzion) + +aa_seq <- "MHQGVMNVNMGQKMNS" +aas <- .Internal(strsplit(aa_seq, "", fixed = TRUE, perl = FALSE, useBytes = FALSE)) +aas <- .Internal(unlist(aas, recursive = FALSE, use.names = FALSE)) + +m <- c("M", "M", "N", "N") +labs <- ps <- c(2, 2) +names(ps) <- c("M", "N") +names(labs) <- c("Oxidation (M)", "Deamidated (N)") + +M <- c("Oxidation (M)", "Oxidation (M)", "Deamidated (N)", "Deamidated (N)") +M <- matrix(M, ncol = 4) +attr(M, "ps") <- ps +attr(M, "resids") <- m + +mzion:::combi_namesiteU(M, aas) +} +} diff --git a/man/expand_grid.Rd b/man/expand_grid.Rd new file mode 100644 index 0000000..7f9cd3e --- /dev/null +++ b/man/expand_grid.Rd @@ -0,0 +1,16 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/utils_engine.R +\name{expand_grid} +\alias{expand_grid} +\title{Modified from expand.grid} +\usage{ +expand_grid(..., nmax = 3L) +} +\arguments{ +\item{...}{Lists of data.} + +\item{nmax}{The maximum number of combinations allowed.} +} +\description{ +Net yet used. +} diff --git a/man/expand_grid_rows.Rd b/man/expand_grid_rows.Rd index b155278..690d36a 100644 --- a/man/expand_grid_rows.Rd +++ b/man/expand_grid_rows.Rd @@ -4,7 +4,7 @@ \alias{expand_grid_rows} \title{Expands grids.} \usage{ -expand_grid_rows(..., nmax = 8L, use.names = TRUE) +expand_grid_rows(..., nmax = 3L, use.names = TRUE) } \arguments{ \item{...}{Lists of data.} diff --git a/man/find_psm_rows2.Rd b/man/find_psm_rowsC.Rd similarity index 83% rename from man/find_psm_rows2.Rd rename to man/find_psm_rowsC.Rd index 680c560..c843d50 100644 --- a/man/find_psm_rows2.Rd +++ b/man/find_psm_rowsC.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/mapMS2ions.R -\name{find_psm_rows2} -\alias{find_psm_rows2} +\name{find_psm_rowsC} +\alias{find_psm_rowsC} \title{Extracts the first row of matched PSMs from psmC.} \usage{ -find_psm_rows2(file_t0, scan, raw_file, rank = 1L, is_decoy = FALSE) +find_psm_rowsC(file_t0, scan, raw_file, rank = 1L, is_decoy = FALSE) } \arguments{ \item{file_t0}{The filename of psmC results.} diff --git a/man/find_psm_rows1.Rd b/man/find_psm_rowsQ.Rd similarity index 92% rename from man/find_psm_rows1.Rd rename to man/find_psm_rowsQ.Rd index 9d984cb..c03ad96 100644 --- a/man/find_psm_rows1.Rd +++ b/man/find_psm_rowsQ.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/mapMS2ions.R -\name{find_psm_rows1} -\alias{find_psm_rows1} +\name{find_psm_rowsQ} +\alias{find_psm_rowsQ} \title{Extracts the first row of matched PSMs from tiers 1-3.} \usage{ -find_psm_rows1( +find_psm_rowsQ( file_t1, file_t2, file_t3, diff --git a/man/find_vmodposU.Rd b/man/find_vmodposU.Rd index 65e766c..c2adfa8 100644 --- a/man/find_vmodposU.Rd +++ b/man/find_vmodposU.Rd @@ -20,3 +20,17 @@ One-to-one correspondence between Names and Sites. \details{ Custom functions: vec_to_list. } +\examples{ +\donttest{ +library(mzion) + +m <- c("M", "M", "N", "N") +ps <- c(2, 2) +names(ps) <- c("M", "N") + +aa_seq <- "MHQGVMNVNMGQKMNS" +aas <- .Internal(strsplit(aa_seq, "", fixed = TRUE, perl = FALSE, useBytes = FALSE)) +aas <- .Internal(unlist(aas, recursive = FALSE, use.names = FALSE)) +ans <- mzion:::find_vmodposU(m, ps, aas) +} +} diff --git a/man/gen_ms2ions_base.Rd b/man/gen_ms2ions_base.Rd index 962fe10..d1893a0 100644 --- a/man/gen_ms2ions_base.Rd +++ b/man/gen_ms2ions_base.Rd @@ -25,8 +25,8 @@ gen_ms2ions_base( type_ms2ions = "by", maxn_vmods_per_pep = 5L, maxn_sites_per_vmod = 3L, - maxn_fnl_per_seq = 64L, - maxn_vnl_per_seq = 64L, + maxn_fnl_per_seq = 3L, + maxn_vnl_per_seq = 3L, maxn_vmods_sitescombi_per_pep = 64L, digits = 4L ) @@ -48,8 +48,8 @@ gen_ms2ions_a0_vnl0_fnl1( type_ms2ions = "by", maxn_vmods_per_pep = 5L, maxn_sites_per_vmod = 3L, - maxn_fnl_per_seq = 8L, - maxn_vnl_per_seq = 8L, + maxn_fnl_per_seq = 3L, + maxn_vnl_per_seq = 3L, maxn_vmods_sitescombi_per_pep = 64L, digits = 4L ) @@ -71,8 +71,8 @@ gen_ms2ions_a1_vnl0_fnl0( type_ms2ions = "by", maxn_vmods_per_pep = 5L, maxn_sites_per_vmod = 3L, - maxn_fnl_per_seq = 64L, - maxn_vnl_per_seq = 64L, + maxn_fnl_per_seq = 3L, + maxn_vnl_per_seq = 3L, maxn_vmods_sitescombi_per_pep = 64L, digits = 4L ) @@ -95,8 +95,8 @@ gen_ms2ions_a1_vnl0_fnl1( maxn_vmods_per_pep = 5L, maxn_sites_per_vmod = 3L, maxn_vmods_sitescombi_per_pep = 64L, - maxn_fnl_per_seq = 64L, - maxn_vnl_per_seq = 64L, + maxn_fnl_per_seq = 3L, + maxn_vnl_per_seq = 3L, digits = 4L ) @@ -118,8 +118,8 @@ gen_ms2ions_a1_vnl1_fnl0( maxn_vmods_per_pep = 5L, maxn_sites_per_vmod = 3L, maxn_vmods_sitescombi_per_pep = 64L, - maxn_fnl_per_seq = 64L, - maxn_vnl_per_seq = 64L, + maxn_fnl_per_seq = 3L, + maxn_vnl_per_seq = 3L, digits = 4L ) } @@ -208,8 +208,8 @@ fixedmods <- c("TMT6plex (K)", "Carbamidomethyl (C)") varmods <- c("TMT6plex (N-term)", "Acetyl (Protein N-term)", "Oxidation (M)", "Deamidated (N)", "Gln->pyro-Glu (N-term = Q)") -mod_indexes <- seq_along(c(fixedmods, varmods)) \%>\% - as.hexmode() \%>\% +mod_indexes <- seq_along(c(fixedmods, varmods)) |> + as.hexmode() |> `names<-`(c(fixedmods, varmods)) aa_masses_all <- calc_aamasses(fixedmods, varmods) @@ -243,8 +243,8 @@ out <- mzion:::gen_ms2ions_base(aa_seq = aa_seq, ms1_mass = ms1_mass, fixedmods <- c("TMT6plex (N-term)", "TMT6plex (K)", "Carbamidomethyl (C)") varmods <- c("Oxidation (M)", "Deamidated (N)") -mod_indexes <- seq_along(c(fixedmods, varmods)) \%>\% - as.hexmode() \%>\% +mod_indexes <- seq_along(c(fixedmods, varmods)) |> + as.hexmode() |> `names<-`(c(fixedmods, varmods)) aa_masses_all <- calc_aamasses(fixedmods, varmods) @@ -276,14 +276,13 @@ out <- mzion:::gen_ms2ions_base(aa_seq = aa_seq, ms1_mass = ms1_mass, } \donttest{ library(mzion) -library(magrittr) # (5) "amods- tmod+ vnl- fnl+" fixedmods <- c("TMT6plex (N-term)", "Oxidation (M)", "dHex (S)") varmods <- c("Acetyl (Protein N-term)") -mod_indexes <- seq_along(c(fixedmods, varmods)) \%>\% - as.hexmode() \%>\% +mod_indexes <- seq_along(c(fixedmods, varmods)) |> + as.hexmode() |> `names<-`(c(fixedmods, varmods)) aa_masses_all <- calc_aamasses(fixedmods, varmods) @@ -312,24 +311,23 @@ aa_seq <- "MHQGVMNVGMGQKMNS" # variable `TMT6plex (N-term)` + `fixed Oxidation (M)` # (additive varmod on top of fixedmod allowed) -out <- mzion:::gen_ms2ions_a0_vnl0_fnl1(aa_seq = aa_seq, ms1_mass = NULL, - aa_masses = aa_masses, ntmod = NULL, ctmod = NULL, - ntmass = ntmass, ctmass = ctmass, - amods = NULL, vmods_nl = NULL, fmods_nl = fmods_nl, - mod_indexes = mod_indexes) - +out <- mzion:::gen_ms2ions_a0_vnl0_fnl1( + aa_seq = aa_seq, ms1_mass = NULL, + aa_masses = aa_masses, ntmod = NULL, ctmod = NULL, + ntmass = ntmass, ctmass = ctmass, + amods = NULL, vmods_nl = NULL, fmods_nl = fmods_nl, + mod_indexes = mod_indexes) } \donttest{ library(mzion) -library(magrittr) # (8a) "amods+ tmod+ vnl- fnl-" fixedmods <- c("TMT6plex (K)") varmods <- c("Deamidated (N)", "Carbamidomethyl (S)", "Acetyl (Protein N-term)") -mod_indexes <- seq_along(c(fixedmods, varmods)) \%>\% - as.hexmode() \%>\% +mod_indexes <- seq_along(c(fixedmods, varmods)) |> + as.hexmode() |> `names<-`(c(fixedmods, varmods)) aa_masses_all <- calc_aamasses(fixedmods, varmods) @@ -387,8 +385,8 @@ varmods <- c("Acetyl (Protein N-term)", "Oxidation (M)", "Deamidated (N)", fixedmods <- sort(fixedmods) varmods <- sort(varmods) -mod_indexes <- seq_along(c(fixedmods, varmods)) \%>\% - as.hexmode() \%>\% +mod_indexes <- seq_along(c(fixedmods, varmods)) |> + as.hexmode() |> `names<-`(c(fixedmods, varmods)) aa_masses_all <- calc_aamasses(fixedmods, varmods) @@ -439,14 +437,13 @@ out <- mzion:::gen_ms2ions_a1_vnl0_fnl0(aa_seq = aa_seq, ms1_mass = ms1_mass, } \donttest{ library(mzion) -library(magrittr) # (12) "amods+ tmod+ vnl- fnl+" fixedmods <- c("TMT6plex (K)", "Oxidation (M)", "dHex (S)") varmods <- c("Deamidated (N)", "Acetyl (Protein N-term)") -mod_indexes <- seq_along(c(fixedmods, varmods)) \%>\% - as.hexmode() \%>\% +mod_indexes <- seq_along(c(fixedmods, varmods)) |> + as.hexmode() |> `names<-`(c(fixedmods, varmods)) aa_masses_all <- calc_aamasses(fixedmods, varmods) @@ -504,15 +501,14 @@ ms1_mass <- ms1_masses$mass[[2]][2] # 1367.6996 } \donttest{ library(mzion) -library(magrittr) # (10) "amods+ tmod+ vnl+ fnl-" fixedmods <- c("TMT6plex (K)") varmods <- c("dHex (S)", "Oxidation (M)", "Deamidated (N)", "Acetyl (Protein N-term)") -mod_indexes <- seq_along(c(fixedmods, varmods)) \%>\% - as.hexmode() \%>\% +mod_indexes <- seq_along(c(fixedmods, varmods)) |> + as.hexmode() |> `names<-`(c(fixedmods, varmods)) aa_masses_all <- calc_aamasses(fixedmods, varmods) @@ -581,8 +577,8 @@ fixedmods <- c("TMT6plex (N-term)", "TMT6plex (K)", varmods <- c("Acetyl (Protein N-term)", "Oxidation (M)", "Carbamidomethyl (M)") -mod_indexes <- seq_along(c(fixedmods, varmods)) \%>\% - as.hexmode() \%>\% +mod_indexes <- seq_along(c(fixedmods, varmods)) |> + as.hexmode() |> `names<-`(c(fixedmods, varmods)) aa_masses_all <- calc_aamasses(fixedmods, varmods) diff --git a/man/hadd_primatches.Rd b/man/hadd_primatches.Rd index d1b487d..f63efa3 100644 --- a/man/hadd_primatches.Rd +++ b/man/hadd_primatches.Rd @@ -10,6 +10,7 @@ hadd_primatches( add_ms2theos2 = FALSE, add_ms2moverzs = FALSE, add_ms2ints = FALSE, + by_modules = TRUE, index_mgf_ms2 = FALSE ) } @@ -38,6 +39,11 @@ hadd_primatches( \item{add_ms2ints}{Logical; if TRUE, adds the sequence of experimental MS2 intensity values (\code{pep_ms2_ints}).} +\item{by_modules}{Logical. Experimenting. At the TRUE default, searches MS +data by individual modules of combinatorial fixed and variable +modifications. If FALSE, search all modules together. The later would +probably need more than 32G RAM if the number of modules is over 96.} + \item{index_mgf_ms2}{A low-priority feature. Logical; if TRUE, converts upfrontly MS2 m-over-z values from numeric to integers as opposed to \emph{in-situ} conversion during ion matches. The default is FALSE. The diff --git a/man/hms2match.Rd b/man/hms2match.Rd index 710f556..818d8fc 100644 --- a/man/hms2match.Rd +++ b/man/hms2match.Rd @@ -111,9 +111,10 @@ interrogation. The default is 110.} the fields of \code{pep_ms2_deltas, pep_ms2_deltas2, pep_ms2_deltas_mean, pep_ms2_deltas_sd} are nullified in the outputs.} -\item{by_modules}{Logical; if TRUE, results are saved with one mgf to one -theo module. At FALSE, results are saved with one mgf paired to all theo -modules} +\item{by_modules}{Logical. Experimenting. At the TRUE default, searches MS +data by individual modules of combinatorial fixed and variable +modifications. If FALSE, search all modules together. The later would +probably need more than 32G RAM if the number of modules is over 96.} \item{df0}{An output template with zero rows} diff --git a/man/matchMS.Rd b/man/matchMS.Rd index a3bf4cf..7321132 100644 --- a/man/matchMS.Rd +++ b/man/matchMS.Rd @@ -79,7 +79,7 @@ matchMS( min_ret_time = 0, max_ret_time = Inf, calib_ms1mass = FALSE, - ppm_ms1calib = 10L, + ppm_ms1calib = 20L, add_ms2theos = FALSE, add_ms2theos2 = FALSE, add_ms2moverzs = FALSE, @@ -93,6 +93,7 @@ matchMS( svm_costs = c(0.1, 0.3, 1, 3, 10), svm_def_cost = 1, svm_iters = 10L, + by_modules = TRUE, digits = 4L, ... ) @@ -500,12 +501,11 @@ seconds for considerations. The default is 0.} \item{max_ret_time}{A non-negative numeric; the maximum retention time in seconds for considerations. The default is \code{Inf}.} -\item{calib_ms1mass}{Temporarily diabled. Logical; if TRUE, calibrates -precursor masses.} +\item{calib_ms1mass}{Logical; if TRUE, calibrates precursor masses.} \item{ppm_ms1calib}{A positive integer; the mass tolerance of MS1 species for -precursor mass calibration. The default is 10. The argument has no effect -at \code{calib_ms1mass = FALSE}.} +precursor mass calibration. The argument has no effect at +\code{calib_ms1mass = FALSE}.} \item{add_ms2theos}{Logical. If true, adds the sequence of primary theoretical MS2 m/z values (\code{pep_ms2_theos}). The sequence order at a @@ -549,6 +549,11 @@ validation.} \item{svm_iters}{The number of iteration in SVM learning.} +\item{by_modules}{Logical. Experimenting. At the TRUE default, searches MS +data by individual modules of combinatorial fixed and variable +modifications. If FALSE, search all modules together. The later would +probably need more than 32G RAM if the number of modules is over 96.} + \item{digits}{A non-negative integer; the number of decimal places to be used. The default is 4.} diff --git a/man/ms2match.Rd b/man/ms2match.Rd index 8dd732a..6041cd7 100644 --- a/man/ms2match.Rd +++ b/man/ms2match.Rd @@ -17,15 +17,14 @@ ms2match( maxn_vmods_sitescombi_per_pep = 64L, minn_ms2 = 6L, ppm_ms1 = 20L, - ppm_ms1calib = 10L, ppm_ms2 = 20L, min_mass = 200L, max_mass = 4500L, min_ms2mass = 115L, quant = "none", ppm_reporters = 10L, - calib_ms1mass = FALSE, by_modules = TRUE, + reframe_mgfs = FALSE, fasta, acc_type, acc_pattern, @@ -39,6 +38,8 @@ ms2match( max_len, max_miss, index_mgf_ms2 = FALSE, + first_search = FALSE, + .savecall = TRUE, digits = 4L ) } @@ -97,10 +98,6 @@ not part of the threshold.} \item{ppm_ms1}{A positive integer; the mass tolerance of MS1 species. The default is 20.} -\item{ppm_ms1calib}{A positive integer; the mass tolerance of MS1 species for -precursor mass calibration. The default is 10. The argument has no effect -at \code{calib_ms1mass = FALSE}.} - \item{ppm_ms2}{A positive integer; the mass tolerance of MS2 species. The default is 20.} @@ -126,8 +123,12 @@ plexes. For example, apply \code{tmt16} for \code{tmt12} provided a set of \item{ppm_reporters}{A positive integer; the mass tolerance of MS2 reporter ions. The default is 10.} -\item{calib_ms1mass}{Temporarily diabled. Logical; if TRUE, calibrates -precursor masses.} +\item{by_modules}{Logical. Experimenting. At the TRUE default, searches MS +data by individual modules of combinatorial fixed and variable +modifications. If FALSE, search all modules together. The later would +probably need more than 32G RAM if the number of modules is over 96.} + +\item{reframe_mgfs}{Logical; if TRUE, recalculates the frame indexes of MGFs.} \item{fasta}{Character string(s) to the name(s) of fasta file(s) with prepended directory path. The experimenter needs to supply the files.} @@ -192,6 +193,11 @@ per peptide sequence for considerations. The default is 2.} the fields of \code{pep_ms2_deltas, pep_ms2_deltas2, pep_ms2_deltas_mean, pep_ms2_deltas_sd} are nullified in the outputs.} +\item{first_search}{Logical; is the first search (for MGF mass calibration) +or not.} + +\item{.savecall}{Logical; if TRUE, saves the current call.} + \item{digits}{A non-negative integer; the number of decimal places to be used. The default is 4.} } diff --git a/man/order_fracs.Rd b/man/order_fracs.Rd index d5a5179..10a4dc4 100644 --- a/man/order_fracs.Rd +++ b/man/order_fracs.Rd @@ -4,12 +4,14 @@ \alias{order_fracs} \title{Order fractions} \usage{ -order_fracs(type = "list_table", tempdir) +order_fracs(type = "list_table", tempdir, by_modules = TRUE) } \arguments{ -\item{type}{The type of files} +\item{type}{The type of files.} -\item{tempdir}{A temporary directory containing the files} +\item{tempdir}{A temporary directory containing the files.} + +\item{by_modules}{Logical; if TRUE, performs searches by modules.} } \description{ Order fractions diff --git a/man/pair_mgftheo.Rd b/man/pair_mgftheo.Rd index 7655d06..77a4dc9 100644 --- a/man/pair_mgftheo.Rd +++ b/man/pair_mgftheo.Rd @@ -4,7 +4,16 @@ \alias{pair_mgftheo} \title{Pairs MGF queries to theoretical MS1 masses and peptide sequences.} \usage{ -pair_mgftheo(mgf_path, n_modules, .path_bin, by_modules = TRUE) +pair_mgftheo( + mgf_path, + n_modules, + .path_bin, + by_modules = TRUE, + reframe_mgfs = FALSE, + min_mass = 200L, + ppm_ms1_bin = 10L, + first_search = FALSE +) } \arguments{ \item{mgf_path}{The path to MGF files} @@ -16,6 +25,17 @@ pair_mgftheo(mgf_path, n_modules, .path_bin, by_modules = TRUE) \item{by_modules}{Logical; if TRUE, results are saved with one mgf to one theo module. At FALSE, results are saved with one mgf paired to all theo modules} + +\item{reframe_mgfs}{Logical; if TRUE, recalculates the frame indexes of MGFs.} + +\item{min_mass}{A positive integer; the minimum precursor mass for +interrogation. The default is an arbitrarily low value (the primary guard +against low molecular-weight precursors is \code{min_len}).} + +\item{ppm_ms1_bin}{The tolerance in precursor mass error after mass binning.} + +\item{first_search}{Logical; is the first search (for MGF mass calibration) +or not.} } \description{ Pairs MGF queries to theoretical MS1 masses and peptide sequences. diff --git a/man/save_ms1calib.Rd b/man/save_ms1calib.Rd deleted file mode 100644 index ac7eb8e..0000000 --- a/man/save_ms1calib.Rd +++ /dev/null @@ -1,27 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/utils_engine.R -\name{save_ms1calib} -\alias{save_ms1calib} -\title{Saves the \code{ppm_ms1} before and after calibration.} -\usage{ -save_ms1calib(ppm_ms1, ppm_ms1calib, mgf_path) -} -\arguments{ -\item{ppm_ms1}{A positive integer; the mass tolerance of MS1 species. The -default is 20.} - -\item{ppm_ms1calib}{The mass error after calibration in ppm.} - -\item{mgf_path}{A file path to a list of MGF files. The experimenter needs to - supply the files. - - The supported MGFs are in the formats of (1) MSConvert against \code{.raw} - from Thermo's Orbitrap or \code{.d} from Bruker's timsTOF Pro, (2) Thermo's - Proteome Discoverer or (3) Bruker's DataAnalysis. - - With MSConvert, the default \code{titleMaker} is required for correct - parsing (don't think it can be altered by users, but just in case).} -} -\description{ -Saves the \code{ppm_ms1} before and after calibration. -} diff --git a/man/search_mgf.Rd b/man/search_mgf.Rd index 25b20b1..ab4854a 100644 --- a/man/search_mgf.Rd +++ b/man/search_mgf.Rd @@ -59,9 +59,10 @@ interrogation. The default is 110.} the fields of \code{pep_ms2_deltas, pep_ms2_deltas2, pep_ms2_deltas_mean, pep_ms2_deltas_sd} are nullified in the outputs.} -\item{by_modules}{Logical; if TRUE, results are saved with one mgf to one -theo module. At FALSE, results are saved with one mgf paired to all theo -modules} +\item{by_modules}{Logical. Experimenting. At the TRUE default, searches MS +data by individual modules of combinatorial fixed and variable +modifications. If FALSE, search all modules together. The later would +probably need more than 32G RAM if the number of modules is over 96.} } \description{ Matches an MGF query