From 6d9ce4c2eb560ca696c70085eb2d59b2cd4bc336 Mon Sep 17 00:00:00 2001 From: yangkl96 Date: Wed, 17 Jul 2024 15:29:22 -0400 Subject: [PATCH] bump to 1.2.37. Separate figures for mass offset loess calibrations. Print out peptides used for calibration --- pom.xml | 2 +- src/main/java/Features/CalibrationFigure.java | 1 + src/main/java/Features/LoessUtilities.java | 15 +++- src/main/java/Features/MainClass.java | 2 +- src/main/java/Features/MzmlReader.java | 20 ++++- .../java/Features/PercolatorFormatter.java | 85 ++++++++++++++++++- src/main/java/Features/RTFunctions.java | 4 +- 7 files changed, 119 insertions(+), 10 deletions(-) diff --git a/pom.xml b/pom.xml index 526d891..7e5e490 100644 --- a/pom.xml +++ b/pom.xml @@ -6,7 +6,7 @@ org.example MSBooster - 1.2.36 + 1.2.37 1.8 diff --git a/src/main/java/Features/CalibrationFigure.java b/src/main/java/Features/CalibrationFigure.java index 426f94d..d7e83b0 100644 --- a/src/main/java/Features/CalibrationFigure.java +++ b/src/main/java/Features/CalibrationFigure.java @@ -33,6 +33,7 @@ public class CalibrationFigure { String charge; public CalibrationFigure() {} + //TODO: repeat but with single entries at a time, with different outFile names public void plotFigure(MzmlReader mzml, String outFile, float opacity, HashMap massToData, HashMap> loessFunctions) throws IOException { diff --git a/src/main/java/Features/LoessUtilities.java b/src/main/java/Features/LoessUtilities.java index 7a2a6b7..3252243 100644 --- a/src/main/java/Features/LoessUtilities.java +++ b/src/main/java/Features/LoessUtilities.java @@ -71,8 +71,9 @@ private static int getPSMs(MzmlReader mzml, //utility for getBetas and LOESS //returns exp and pred RT arrays - public static HashMap getArrays(MzmlReader mzml, int regressionSize, String mode, int charge) + public static Object[] getArrays(MzmlReader mzml, int regressionSize, String mode, int charge) throws FileParsingException { + //returns HashMap for arrays, and HashMap> for peptides ArrayList expValues = new ArrayList<>(); ArrayList predValues = new ArrayList<>(); ArrayList eScores = new ArrayList<>(); //for sorting @@ -146,15 +147,18 @@ public static HashMap getArrays(MzmlReader mzml, int regress massesList.add(""); } + HashMap> peptideMap = new HashMap<>(); for (String mass : massesList) { ArrayList thisExpValues = new ArrayList<>(); ArrayList thisPredValues = new ArrayList<>(); ArrayList thisEscores = new ArrayList<>(); + ArrayList finalPeptides = new ArrayList<>(); //get PSMs specific to this mass if (mass.isEmpty()) { thisExpValues = expValues; thisPredValues = predValues; thisEscores = eScores; + finalPeptides = peptides; } else if (mass.equals("others")) { for (int i = 0; i < peptides.size(); i++) { boolean peptideContains = false; @@ -175,6 +179,7 @@ public static HashMap getArrays(MzmlReader mzml, int regress thisExpValues.add(expValues.get(i)); thisPredValues.add(predValues.get(i)); thisEscores.add(eScores.get(i)); + finalPeptides.add(peptides.get(i)); } } } else { @@ -185,6 +190,7 @@ public static HashMap getArrays(MzmlReader mzml, int regress thisExpValues.add(expValues.get(i)); thisPredValues.add(predValues.get(i)); thisEscores.add(eScores.get(i)); + finalPeptides.add(peptides.get(i)); break; } } @@ -231,11 +237,14 @@ public static HashMap getArrays(MzmlReader mzml, int regress int[] sortedIndices2 = Arrays.copyOfRange(sortedIndices, 0, regressionSize); double[][] thisValues = new double[2][regressionSize]; + ArrayList newFinalPeptides = new ArrayList<>(); for (int i = 0; i < regressionSize; i++) { int idx = sortedIndices2[i]; thisValues[0][i] = thisExpValues.get(idx); thisValues[1][i] = thisPredValues.get(idx); + newFinalPeptides.add(finalPeptides.get(idx)); } + finalPeptides = newFinalPeptides; massToDataMap.put(mass, thisValues); } else { if (mass.isEmpty()) { @@ -248,7 +257,9 @@ public static HashMap getArrays(MzmlReader mzml, int regress thisValues[1] = thisPredValues.stream().mapToDouble(i -> i).toArray(); massToDataMap.put(mass, thisValues); } + + peptideMap.put(mass, finalPeptides); } - return massToDataMap; + return new Object[] {massToDataMap, peptideMap}; } } diff --git a/src/main/java/Features/MainClass.java b/src/main/java/Features/MainClass.java index e26d5b4..1a47da9 100644 --- a/src/main/java/Features/MainClass.java +++ b/src/main/java/Features/MainClass.java @@ -41,7 +41,7 @@ public class MainClass { public static ScheduledThreadPoolExecutor executorService; public static void main(String[] args) throws Exception { Locale.setDefault(Locale.US); - printInfo("MSBooster v1.2.36"); + printInfo("MSBooster v1.2.37"); try { //accept command line inputs diff --git a/src/main/java/Features/MzmlReader.java b/src/main/java/Features/MzmlReader.java index 25b76b5..6f38dbb 100644 --- a/src/main/java/Features/MzmlReader.java +++ b/src/main/java/Features/MzmlReader.java @@ -65,6 +65,8 @@ public class MzmlReader { public ArrayList>> IMLOESS = new ArrayList<>(); public HashMap expAndPredRTs; public HashMap> expAndPredIMsHashMap = new HashMap<>(); + public HashMap> RTpeptides; + public HashMap> IMpeptides = new HashMap<>(); public List futureList = new ArrayList<>(Constants.numThreads); public MzmlReader(String filename) throws FileParsingException, ExecutionException, InterruptedException { @@ -806,7 +808,9 @@ public void setLOESS(int regressionSize, String bandwidth, int robustIters, Stri } if (mode.equals("RT")) { - expAndPredRTs = LoessUtilities.getArrays(this, regressionSize, mode, 0); + Object[] arraysAndPeptides = LoessUtilities.getArrays(this, regressionSize, mode, 0); + expAndPredRTs = (HashMap) arraysAndPeptides[0]; + RTpeptides = (HashMap>) arraysAndPeptides[1]; //repeat this process for each mass shift group for (String mass : masses) { @@ -870,8 +874,18 @@ public void setLOESS(int regressionSize, String bandwidth, int robustIters, Stri } } else if (mode.equals("IM")) { for (int charge = 1; charge < IMFunctions.numCharges + 1; charge++) { - HashMap expAndPredIMs = - LoessUtilities.getArrays(this, regressionSize, mode, charge); + Object[] arraysAndPeptides = LoessUtilities.getArrays(this, regressionSize, mode, charge); + HashMap expAndPredIMs = (HashMap) arraysAndPeptides[0]; + HashMap> peptideMap = + (HashMap>) arraysAndPeptides[1]; + for (Map.Entry> entry : peptideMap.entrySet()) { + if (!IMpeptides.containsKey(entry.getKey())) { + IMpeptides.put(entry.getKey(), new ArrayList<>()); + } + ArrayList peptides = IMpeptides.get(entry.getKey()); + peptides.addAll(entry.getValue()); + IMpeptides.put(entry.getKey(), peptides); + } HashMap> IMLOESSmap = new HashMap<>(); for (String mass : masses) { diff --git a/src/main/java/Features/PercolatorFormatter.java b/src/main/java/Features/PercolatorFormatter.java index 2e9a9f7..0f5f63c 100644 --- a/src/main/java/Features/PercolatorFormatter.java +++ b/src/main/java/Features/PercolatorFormatter.java @@ -23,10 +23,13 @@ import com.google.common.collect.Range; import com.google.common.collect.RangeMap; import com.google.common.collect.TreeRangeMap; +import kotlin.jvm.functions.Function1; import org.apache.commons.lang3.ArrayUtils; import umich.ms.fileio.exceptions.FileParsingException; +import java.io.BufferedWriter; import java.io.File; +import java.io.FileWriter; import java.io.IOException; import java.sql.SQLException; import java.util.*; @@ -518,8 +521,45 @@ public static void editPin(PinMzmlMatcher pmMatcher, String[] features, String o } } if (plot) { - new RTCalibrationFigure(mzml, pinFiles[i].getCanonicalPath(), 0.2f, + CalibrationFigure cf = new RTCalibrationFigure(mzml, pinFiles[i].getCanonicalPath(), 0.2f, mzml.expAndPredRTs, mzml.RTLOESS); + + //individual figures by mass + //separate figures + for (String mass : mzml.expAndPredRTs.keySet()) { + if (!mass.isEmpty()) { + HashMap miniMassToData = new HashMap<>(); + miniMassToData.put(mass, mzml.expAndPredRTs.get(mass)); + HashMap> miniLoessFunctions = new HashMap<>(); + miniLoessFunctions.put(mass, mzml.RTLOESS.get(mass)); + new RTCalibrationFigure(mzml, + pinFiles[i].getCanonicalPath().substring(0, + pinFiles[i].getCanonicalPath().length() - 4) + "_" + mass + ".pin", + 0.2f, miniMassToData, miniLoessFunctions); + } + } + + //write peptides used + File f = new File(pinFiles[i].getCanonicalPath()); + String calibrationPeptideFilePathBase = + f.getParent() + File.separator + "MSBooster_plots" + File.separator + cf.folderString + + File.separator + f.getName().substring(0, f.getName().length() - 4) + + "_RTcalibrationPeptides"; + for (Map.Entry> entry : mzml.RTpeptides.entrySet()) { + String calibrationPeptideFilePath = calibrationPeptideFilePathBase; + if (!entry.getKey().isEmpty()) { + calibrationPeptideFilePath += "_" + entry.getKey(); + } + try (BufferedWriter writer = new BufferedWriter( + new FileWriter(calibrationPeptideFilePath + ".txt"))) { + for (String peptide : entry.getValue()) { + writer.write(peptide); + writer.newLine(); + } + } catch (IOException e) { + e.printStackTrace(); + } + } } } if (featuresList.contains("deltaRTlinear")) { @@ -564,6 +604,8 @@ public static void editPin(PinMzmlMatcher pmMatcher, String[] features, String o mzml.setLOESS(Constants.imLoessRegressionSize, Constants.loessBandwidth, Constants.robustIters, "IM"); mzml.predictIMLOESS(executorService); + CalibrationFigure cf = null; + boolean writePeptides = false; for (int charge : mzml.expAndPredIMsHashMap.keySet()) { boolean plot = false; for (Map.Entry entry : mzml.expAndPredIMsHashMap.get(charge).entrySet()) { @@ -573,8 +615,47 @@ public static void editPin(PinMzmlMatcher pmMatcher, String[] features, String o } } if (plot) { - new IMCalibrationFigure(mzml, pinFiles[i].getCanonicalPath(), 0.2f, + cf = new IMCalibrationFigure(mzml, pinFiles[i].getCanonicalPath(), 0.2f, mzml.expAndPredIMsHashMap.get(charge), mzml.IMLOESS.get(charge - 1), charge); + writePeptides = true; + + //individual figures by mass + //separate figures + for (String mass : mzml.expAndPredIMsHashMap.get(charge).keySet()) { + if (!mass.isEmpty()) { + HashMap miniMassToData = new HashMap<>(); + miniMassToData.put(mass, mzml.expAndPredIMsHashMap.get(charge).get(mass)); + HashMap> miniLoessFunctions = new HashMap<>(); + miniLoessFunctions.put(mass, mzml.IMLOESS.get(charge - 1).get(mass)); + new IMCalibrationFigure(mzml, + pinFiles[i].getCanonicalPath().substring(0, + pinFiles[i].getCanonicalPath().length() - 4) + "_" + mass + ".pin", + 0.2f, miniMassToData, miniLoessFunctions, charge); + } + } + } + } + if (writePeptides) { + //write peptides used + File f = new File(pinFiles[i].getCanonicalPath()); + String calibrationPeptideFilePathBase = + f.getParent() + File.separator + "MSBooster_plots" + File.separator + cf.folderString + + File.separator + f.getName().substring(0, f.getName().length() - 4) + + "_IMcalibrationPeptides"; + for (Map.Entry> entry : mzml.IMpeptides.entrySet()) { + String calibrationPeptideFilePath = calibrationPeptideFilePathBase; + if (!entry.getKey().isEmpty()) { + calibrationPeptideFilePath += "_" + entry.getKey(); + } + try (BufferedWriter writer = new BufferedWriter( + new FileWriter(calibrationPeptideFilePath + ".txt"))) { + for (String peptide : entry.getValue()) { + writer.write(peptide); + writer.newLine(); + } + } catch (IOException e) { + e.printStackTrace(); + } } } } diff --git a/src/main/java/Features/RTFunctions.java b/src/main/java/Features/RTFunctions.java index 2130ecc..86c6e0b 100644 --- a/src/main/java/Features/RTFunctions.java +++ b/src/main/java/Features/RTFunctions.java @@ -55,7 +55,9 @@ public static float normalizeRT(float[] betas, float expRT) { //assumes peptide objects already set //TODO: only supporting getting PSMs for supported PTMs public static float[] getBetas(MzmlReader mzml, int RTregressionSize) throws FileParsingException { - double[][] RTs = LoessUtilities.getArrays(mzml, RTregressionSize, "RT", 0).get(""); + HashMap arrays = (HashMap) + LoessUtilities.getArrays(mzml, RTregressionSize, "RT", 0)[0]; + double[][] RTs = arrays.get(""); return StatMethods.linearRegression(RTs[0], RTs[1]); }