Skip to content

Commit

Permalink
2024-04-02
Browse files Browse the repository at this point in the history
Bugs:
- Moved the interactive question of Thermo's license agreement from parallel processes to upstream.

Updates:
- Provided the C# source codes that wraps Thermo's MSFileReader.
  • Loading branch information
qzhang503 committed Apr 2, 2024
1 parent 01a9817 commit 8e3ff2f
Show file tree
Hide file tree
Showing 7 changed files with 276 additions and 39 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Package: mzion
Type: Package
Title: Proteomics Database Searches of Mass-spectrometrirc Data.
Version: 1.3.9
Version: 1.3.9.1
Authors@R:
person(given = "Qiang",
family = "Zhang",
Expand Down
19 changes: 13 additions & 6 deletions R/msfilereader.R
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,9 @@
#' @param filelist A list of RAW MS files.
readRAW <- function (mgf_path = NULL, filelist = NULL)
{
sys_path <- system.file("extdata", package = "mzion")
acceptMSFileReaderLicense(sys_path)

temp_dir <- create_dir(file.path(mgf_path, "temp_dir"))

local({
Expand Down Expand Up @@ -46,7 +49,8 @@ proc_raws <- function (raw_file, mgf_path, temp_dir)
filepeak <- exeReadRAW(raw_file, mgf_path)
pathfile <- file.path(mgf_path, filepeak)
lines <- readLines(pathfile)
# lines <- stringi::stri_read_lines(pathfile) # memory allocation or access error
# DON'T; see the help document; memory allocation or access error
# lines <- stringi::stri_read_lines(pathfile)

idx_scans <- .Internal(which(stringi::stri_cmp_eq(lines, "SCAN")))
scan_nums <- lines[idx_scans + 1L]
Expand Down Expand Up @@ -75,16 +79,20 @@ proc_raws <- function (raw_file, mgf_path, temp_dir)
iso_lwrs <- iso_ctrs - half_widths
iso_uprs <- iso_ctrs + half_widths

len <- length(msx_moverzs)
na_ints <- rep_len(NA_integer_, len)
na_reals <- rep_len(NA_real_, len)

out <- list(
msx_moverzs = msx_moverzs,
msx_ints = msx_ints,
msx_ns = msx_ns,
ms1_moverzs = NA_real_,
ms1_charges = NA_integer_,
ms1_ints = NA_real_,
ms1_moverzs = na_reals,
ms1_charges = na_ints,
ms1_ints = na_reals,

scan_title = scan_titles,
raw_file = raw_file, # single entry
raw_file = raw_file, # scalar
ms_level = ms_levels,
ret_time = ret_times,
scan_num = as.integer(scan_nums),
Expand Down Expand Up @@ -115,7 +123,6 @@ proc_raws <- function (raw_file, mgf_path, temp_dir)
exeReadRAW <- function(raw_file, mgf_path)
{
sys_path <- system.file("extdata", package = "mzion")
acceptMSFileReaderLicense(sys_path)
exe <- file.path(sys_path, name_exe = "ReadRAW.exe")
mono <- if (Sys.info()['sysname'] %in% c("Darwin", "Linux")) TRUE else FALSE

Expand Down
45 changes: 22 additions & 23 deletions R/mzml.R
Original file line number Diff line number Diff line change
Expand Up @@ -62,36 +62,25 @@ readmzML <- function (filelist = NULL, mgf_path = NULL, data_type = "mzml",
# - find_ms1stat
#

if (data_type == "raw")
message("Processing RAW files")
else if (data_type == "mzml")
message("Processing mzML files.")

temp_dir <- create_dir(file.path(mgf_path, "temp_dir"))

if (data_type == "raw") {
message("Processing RAW files")
peakfiles <- readRAW(mgf_path = mgf_path, filelist = filelist)
}
else if (data_type == "mzml") {
message("Processing mzML files.")
peakfiles <- hloadMZML(filelist, mgf_path, temp_dir)
gc() # free up xml pointers
}
else {
is_dia <- TRUE
peakfiles <- "23aug2017_hela_serum_timecourse_wide_1a.raw.rds"

is_dia <- FALSE
peakfiles <- "20230108_AST_Neo1_DDA_UHG_HeLa_200ng_2th2p5ms_Cycle_01_20230808143253.raw.rds"

is_dia <- FALSE
peakfiles <- "01CPTAC3_Benchmarking_W_BI_20170508_BL_f02.raw.rds"
mzml_type <- "pwiz_3d"

is_dia <- FALSE
peakfiles <- "CPTAC_CCRCC_W_JHU_20190112_LUMOS_C3N-01261_T.raw.rds"

# peakfiles <- qs::qread("~/peakfiles_bi_g1.rds")
peakfiles <- qs::qread(file.path(temp_dir, "peaklists.rds"))
is_dia <- FALSE
peakfiles <- "01CPTAC3_Benchmarking_W_BI_20170508_BL_f02.raw.rds"
mzml_type <- "raw"
}

peakfile1 <- peakfiles[[1]]
Expand Down Expand Up @@ -262,6 +251,7 @@ readmzML <- function (filelist = NULL, mgf_path = NULL, data_type = "mzml",
MoreArgs = list(
mgf_path = mgf_path,
temp_dir = temp_dir,
mzml_type = mzml_type,
ppm_ms1 = ppm_ms1, ppm_ms2 = ppm_ms2,
maxn_mdda_precurs = maxn_mdda_precurs,
topn_ms2ions = topn_ms2ions,
Expand Down Expand Up @@ -295,6 +285,7 @@ readmzML <- function (filelist = NULL, mgf_path = NULL, data_type = "mzml",
MoreArgs = list(
mgf_path = mgf_path,
temp_dir = temp_dir,
mzml_type = mzml_type,
ppm_ms1 = ppm_ms1, ppm_ms2 = ppm_ms2,
maxn_mdda_precurs = maxn_mdda_precurs,
topn_ms2ions = topn_ms2ions,
Expand Down Expand Up @@ -1041,15 +1032,15 @@ extrDDA <- function (spec = NULL, raw_file = NULL, temp_dir = NULL,


#' Helper of \link{deisoDDA}.
#'
#'
#' @param raw_id A raw file id.
#' @param n_para The allowance of parallel processing.
#' @param n_para The allowance of parallel processing.
#' @param mgf_cutmzs The cut points in peak lists.
#' @param mgf_cutpercs The percentage of cuts.
#' @inheritParams deisoDDA
#' @inheritParams matchMS
hdeisoDDA <- function (filename, raw_id = 1L, mgf_path = NULL, temp_dir = NULL,
ppm_ms1 = 10L, ppm_ms2 = 10L,
mzml_type = "raw", ppm_ms1 = 10L, ppm_ms2 = 10L,
maxn_mdda_precurs = 5L,
topn_ms2ions = 150L, n_mdda_flanks = 6L, n_dia_scans = 4L,
min_mass = 200L, max_mass = 4500L,
Expand All @@ -1069,6 +1060,7 @@ hdeisoDDA <- function (filename, raw_id = 1L, mgf_path = NULL, temp_dir = NULL,
df <- deisoDDA(
filename,
temp_dir = temp_dir,
mzml_type = mzml_type,
ppm_ms1 = ppm_ms1,
ppm_ms2 = ppm_ms2,
maxn_mdda_precurs = maxn_mdda_precurs,
Expand Down Expand Up @@ -1127,9 +1119,11 @@ hdeisoDDA <- function (filename, raw_id = 1L, mgf_path = NULL, temp_dir = NULL,
#'
#' @param filename A peaklist filename.
#' @param temp_dir A temp_dir to the filename.
#' @param mzml_type The type of peak lists. To trigger MS1 deisotoping at
#' \code{mzml_type == "raw"} (MS1 X, y and Z are NA with Mzion).
#' @param n_para The allowance of parallel processing.
#' @inheritParams matchMS
deisoDDA <- function (filename = NULL, temp_dir = NULL,
deisoDDA <- function (filename = NULL, temp_dir = NULL, mzml_type = "raw",
ppm_ms1 = 10L, ppm_ms2 = 10L,
maxn_mdda_precurs = 5L, topn_ms2ions = 150L,
n_mdda_flanks = 6L, n_dia_scans = 4L,
Expand All @@ -1156,9 +1150,9 @@ deisoDDA <- function (filename = NULL, temp_dir = NULL,
msx_moverzs <- ans$msx_moverzs
msx_ints <- ans$msx_ints
msx_ns <- ans$msx_ns
ms1_moverzs <- ans$ms1_moverzs # by MSConvert
ms1_ints <- ans$ms1_ints # by MSConvert
ms1_charges <- ans$ms1_charges # by MSConvert
ms1_moverzs <- ans$ms1_moverzs # by MSConvert; Mzion: NA
ms1_ints <- ans$ms1_ints # by MSConvert; Mzion: NA
ms1_charges <- ans$ms1_charges # by MSConvert; Mzion: NA
scan_title <- ans$scan_title
raw_file <- ans$raw_file # scalar
ms_level <- ans$ms_level
Expand Down Expand Up @@ -1249,6 +1243,11 @@ deisoDDA <- function (filename = NULL, temp_dir = NULL,
}
}

# MS1 X, Y and Z are NA from Mzion::readRAW; need to trigger deisotoping
if (mzml_type == "raw" && maxn_mdda_precurs < 1L) {
maxn_mdda_precurs <- 1L
}

if (maxn_mdda_precurs) {
if (FALSE) { # slower but keep the codes for group split examples
n_cores <- n_para
Expand Down
213 changes: 213 additions & 0 deletions inst/extdata/ReadRAW/Program.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,213 @@
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using ThermoFisher.CommonCore.Data.Business;
using ThermoFisher.CommonCore.Data.Interfaces;
using ThermoFisher.CommonCore.RawFileReader;

namespace MYSPACE
{
public static class StringArrayExtensions
{
public static int NthIndexOf(this string[] array, string value, int n)
{
int count = 0;
for (int i = 0; i < array.Length; i++)
{
if (array[i] == value)
{
count++;
if (count == n)
{
return i;
}
}
}
return -1;
}
}

public static class IRawDataPlusExtension
{
public static void WriteSpectrum(this IRawDataPlus rawFile, string filename, List<int> L)
{
using (System.IO.StreamWriter file = new System.IO.StreamWriter(filename))
{
int IndexMonoMZ = 8, IndexIsoWidth = 12;
double isoMass = 0.0, isoWidth = 0.0;
string msOrder = "0";
bool isCentroidMS2 = true;

file.WriteLine("RAW\n{0}\n", Path.GetFileName(rawFile.FileName));

foreach (int scanNumber in L)
{
var scanStatistics = rawFile.GetScanStatsForScanNumber(scanNumber);
var scanEvent = rawFile.GetScanEventForScanNumber(scanNumber);

file.WriteLine("SCAN\n{0}", scanNumber);
file.WriteLine("TIME\n{0}", Math.Round(Convert.ToDouble(scanStatistics.StartTime), 6));

// MS order
msOrder = scanEvent.MSOrder.ToString().ToLower();

if (msOrder == "ms2")
{
msOrder = "2";
}
else if (msOrder == "ms")
{
msOrder = "1";
}
else
{
msOrder = "0";
}

file.WriteLine("MSORDER\n{0}", msOrder);

// Centroid MS2
isCentroidMS2 = scanStatistics.IsCentroidScan && msOrder == "2";

if (isCentroidMS2)
{
var reaction0 = scanEvent.GetReaction(0); // only available with centroid scans
isoMass = Math.Round(Convert.ToDouble(reaction0.PrecursorMass), 4);
isoWidth = Math.Round(Convert.ToDouble(reaction0.IsolationWidth), 4);
}

var centroidStream = rawFile.GetCentroidStream(scanNumber, false);
var NcentroidPeaks = centroidStream.Length;

if (NcentroidPeaks > 0)
{
var scanTrailer = rawFile.GetTrailerExtraInformation(scanNumber);
IndexMonoMZ = scanTrailer.Labels.NthIndexOf("Monoisotopic M/Z:", 1);
IndexIsoWidth = scanTrailer.Labels.NthIndexOf("MS2 Isolation Width:", 1);

double[] masses = new double[NcentroidPeaks];
double[] intensities = new double[NcentroidPeaks];

for (int i = 0; i < NcentroidPeaks; i++)
{
masses[i] = Math.Round(Convert.ToDouble(centroidStream.Masses[i]), 4);
intensities[i] = Math.Round(Convert.ToDouble(centroidStream.Intensities[i]), 1);
}

if (isCentroidMS2)
{
file.WriteLine("ISOMASS\n{0}", isoMass);
file.WriteLine("ISOWIDTH\n{0}", isoWidth);
}
else
{
if (IndexMonoMZ >= 0)
{
file.WriteLine("ISOMASS\n{0}", scanTrailer.Values[IndexMonoMZ]); // "-1.00" for DIA
}
else
{
file.WriteLine("ISOMASS\n0");
}

if (IndexIsoWidth >= 0)
{
file.WriteLine("ISOWIDTH\n{0}", scanTrailer.Values[IndexIsoWidth]); // "-1.00" for DIA
}
else
{
file.WriteLine("ISOWIDTH\n0");
}
}

file.WriteLine("NPEAKS\n{0}", NcentroidPeaks);
file.WriteLine("X\n{0}", string.Join(",", masses));
file.WriteLine("Y\n{0}", string.Join(",", intensities));
// file.WriteLine("LOWMASS\n{0}", masses[0]);
// file.WriteLine("HIGHMASS\n{0}", masses[masses.Length - 1]);
file.WriteLine("TITLE\nFile: {0}; scans: {1}\n", rawFile.FileName, scanNumber);
}
else
{
file.WriteLine("ISOMASS\n0");
file.WriteLine("ISOWIDTH\n0");
file.WriteLine("NPEAKS\n0");
file.WriteLine("X\n0");
file.WriteLine("Y\n0");
// file.WriteLine("LOWMASS\n0");
// file.WriteLine("HIGHMASS\n0");
file.WriteLine("TITLE\nFile: {0}; scans: {1}\n", rawFile.FileName, scanNumber);
}

}
}

return;
}

}
}


namespace MyApp
{
using MYSPACE;

internal static class Program
{
private static void Main(string[] args)
{

string filename = string.Empty;

filename = args[0];

if (string.IsNullOrEmpty(filename))
{
Console.WriteLine("No RAW file specified!");
return;
}

try
{
var rawFile = RawFileReaderAdapter.FileFactory(filename);

if (!rawFile.IsOpen || rawFile.IsError)
{
Console.WriteLine("Unable to access the RAW file using the RawFileReader class!");
return;
}

if (rawFile.IsError)
{
return;
}

if (rawFile.InAcquisition)
{
return;
}

rawFile.SelectInstrument(Device.MS, 1);

int firstScanNumber = rawFile.RunHeaderEx.FirstSpectrum;
int lastScanNumber = rawFile.RunHeaderEx.LastSpectrum;
List<int> scans = Enumerable.Range(firstScanNumber, lastScanNumber).ToList();

if (scans.Count > 0)
{
rawFile.WriteSpectrum(args[1], scans);
}

return;
}
catch (Exception ex)
{
Console.WriteLine("Error accessing RAWFileReader library! - " + ex.Message);
}
}

}
}

Loading

0 comments on commit 8e3ff2f

Please sign in to comment.