From d7b183ac6a5b6e35e20c0d1dea99322c5054a3e9 Mon Sep 17 00:00:00 2001
From: julianu <julian.uszkoreit@rub.de>
Date: Tue, 16 Jul 2024 09:52:25 +0200
Subject: [PATCH 1/2] set number of maximal number threads by the CLI

---
 pom.xml                                       |  2 +-
 src/main/java/de/mpc/pia/PIACli.java          | 48 +++++++++++++++++--
 .../de/mpc/pia/modeller/ProteinModeller.java  | 16 ++++++-
 3 files changed, 59 insertions(+), 7 deletions(-)
diff --git a/pom.xml b/pom.xml
index 6f3c8e7..66e9e83 100644
--- a/pom.xml
+++ b/pom.xml
@@ -4,7 +4,7 @@
 
 	<groupId>de.mpc.pia</groupId>
 	<artifactId>pia</artifactId>
-	<version>1.5.4</version>
+	<version>1.5.5</version>
 	<name>PIA - Protein Inference Algorithms</name>
 	<url>https://github.com/mpc-bioinformatics/pia</url>
 
diff --git a/src/main/java/de/mpc/pia/PIACli.java b/src/main/java/de/mpc/pia/PIACli.java
index f6b1342..ab7019e 100755
--- a/src/main/java/de/mpc/pia/PIACli.java
+++ b/src/main/java/de/mpc/pia/PIACli.java
@@ -42,6 +42,11 @@ public class PIACli implements Runnable{
 	@Option(names = { "-o", "--outfile" },
 			description = "output file name (e.g. intermediate PIA file)") 
 	private String outfile;
+
+	@Option(names = { "-t", "--threads" },
+			description = "maximum number of used threads for compilation (0 for use all)",
+			defaultValue = "0") 
+	private String threads;
 	
 	@Option(names = { "-n", "--name" },
 			description = "name of the compilation",
@@ -89,12 +94,16 @@ public static void main(String[] args) {
 	private void processCompile() {
 		PIACompiler piaCompiler = new PIASimpleCompiler();
 		
+		int iThreads = parseThreads();
+		LOGGER.debug("Compiler uses {} CPUs", iThreads);
+		piaCompiler.setNrThreads(iThreads);
+
 		// parse the command line arguments
 		try {
 			if (!parseCommandLineInfiles(piaCompiler)) {
 				return;
 			}
-			
+
 			piaCompiler.buildClusterList();
 			piaCompiler.buildIntermediateStructure();
 			
@@ -108,6 +117,24 @@ private void processCompile() {
 		}
 	}
 
+	/**
+	 * Parses the threads from the CLI into an integer value.
+	 * 
+	 * @return integer value of CLI argument threads, 0 if errored (stands for use all CPUs)
+	 */
+	private int parseThreads() {
+		int iThreads = 0;
+		if (!threads.equals("0")) {
+			try {
+				iThreads = Integer.parseInt(threads);
+			} catch (NumberFormatException e) {
+				LOGGER.error("Could not parse the maximal number of threads, using all available CPUs");
+				iThreads = 0;
+			}
+		}
+		return iThreads;
+	}
+
 	/**
 	 * Parses the files given from the command line in the String array into the
 	 * given {@link PIACompiler}. The files may also contain the name and
@@ -193,7 +220,8 @@ private void processAnalysis() {
 			}
 
 			if (filesExist) {
-				processPIAAnalysis(infiles[0], infiles[1]);
+				int iThreads = parseThreads();
+				processPIAAnalysis(infiles[0], infiles[1], iThreads);
 			}
 		}
 	}
@@ -205,7 +233,7 @@ private void processAnalysis() {
 	 * @param jsonFileName
 	 * @param piaFileName
 	 */
-	public static boolean processPIAAnalysis(String jsonFileName, String piaFileName) {
+	public static boolean processPIAAnalysis(String jsonFileName, String piaFileName, int threads) {
 		PIAModeller modeller = new PIAModeller(piaFileName);
 		JsonAnalysis json = JsonAnalysis.readFromFile(new File(jsonFileName));
 
@@ -234,7 +262,7 @@ public static boolean processPIAAnalysis(String jsonFileName, String piaFileName
 
 		if (processOK && json.isInfereProteins()) {
 			// protein level
-			processOK = modeller.getProteinModeller().executeProteinOperations(json);
+			processOK = modeller.getProteinModeller().executeProteinOperations(json, threads);
 		}
 
 		if (processOK && (json.getProteinExportFile() != null)) {
@@ -257,6 +285,18 @@ public static boolean processPIAAnalysis(String jsonFileName, String piaFileName
 		return processOK;
 	}
 
+
+	/**
+	 * Performs the actual PIA analysis parsing the JSON file for the PIA
+	 * intermediate file.
+	 * 
+	 * @param jsonFileName
+	 * @param piaFileName
+	 */
+	public static boolean processPIAAnalysis(String jsonFileName, String piaFileName) {
+		return processPIAAnalysis(jsonFileName, piaFileName, 0);
+	}
+
 	/**
 	 * {@link IVersionProvider} implementation that returns version information.
 	 */
diff --git a/src/main/java/de/mpc/pia/modeller/ProteinModeller.java b/src/main/java/de/mpc/pia/modeller/ProteinModeller.java
index 7f0fb0d..ad4879a 100755
--- a/src/main/java/de/mpc/pia/modeller/ProteinModeller.java
+++ b/src/main/java/de/mpc/pia/modeller/ProteinModeller.java
@@ -509,17 +509,19 @@ public boolean addInferenceFilter(AbstractFilter newFilter) {
      * <p>
      * If a required setting is not given, the default value is used.
      */
-    public boolean executeProteinOperations(JsonAnalysis json) {
+    public boolean executeProteinOperations(JsonAnalysis json, int threads) {
     	boolean allOk = true;
     	
     	AbstractProteinInference proteinInference =
                 ProteinInferenceFactory.createInstanceOf(json.getInferenceMethod());
-    	
+        
     	if (proteinInference == null) {
     		LOGGER.error("Could not create inference method '{}'", json.getInferenceMethod());
     		allOk = false;
     	} else {
     		LOGGER.info("selected inference method: {}", proteinInference.getName());
+            proteinInference.setAllowedThreads(threads);
+            LOGGER.debug("Protein inference using {} threads (0=all available)", proteinInference.getAllowedThreads());
     	}
     	
     	if (allOk) {
@@ -558,4 +560,14 @@ public boolean executeProteinOperations(JsonAnalysis json) {
     	
     	return allOk;
     }
+
+
+    /**
+     * Execute analysis on protein level, getting the settings from JSON, using all available CPUs
+     * <p>
+     * If a required setting is not given, the default value is used.
+     */
+    public boolean executeProteinOperations(JsonAnalysis json) {
+        return executeProteinOperations(json, 0);
+    }
 }
\ No newline at end of file

From 6838dd6e43418d9931f293ab1dc998deef685160 Mon Sep 17 00:00:00 2001
From: julianu <julian.uszkoreit@rub.de>
Date: Tue, 16 Jul 2024 10:53:58 +0200
Subject: [PATCH 2/2] treating "scan start time" and "retention time"
 equivalently for PSMs

---
 .../compiler/parser/MzIdentMLFileParser.java  | 26 +++++++++++++++++--
 .../de/mpc/pia/tools/OntologyConstants.java   |  1 +
 2 files changed, 25 insertions(+), 2 deletions(-)

diff --git a/src/main/java/de/mpc/pia/intermediate/compiler/parser/MzIdentMLFileParser.java b/src/main/java/de/mpc/pia/intermediate/compiler/parser/MzIdentMLFileParser.java
index 9d8c245..7983f7f 100755
--- a/src/main/java/de/mpc/pia/intermediate/compiler/parser/MzIdentMLFileParser.java
+++ b/src/main/java/de/mpc/pia/intermediate/compiler/parser/MzIdentMLFileParser.java
@@ -520,11 +520,33 @@ private static Double parseRetentionTime(SpectrumIdentificationResult specIdResu
         Double rt = null;
 
         // get the "scan start time" cvParams
-        List<CvParam> scanStartCvParams = specIdResult.getCvParam().stream()
+        List<CvParam> rtCvParams = specIdResult.getCvParam().stream()
             .filter(cvParam -> cvParam.getAccession().equals(OntologyConstants.SCAN_START_TIME.getPsiAccession()))
             .toList();
+        rt = extractRTfromCVs(rtCvParams);
+
+        if (rt == null) {
+            // did not find the RT with this param, try "retention time"
+            rtCvParams = specIdResult.getCvParam().stream()
+                .filter(cvParam -> cvParam.getAccession().equals(OntologyConstants.RETENTION_TIME.getPsiAccession()))
+                .toList();
+                rt = extractRTfromCVs(rtCvParams);
+        }
+
+        return rt;
+    }
+
+
+    /**
+     * Extract scan time in seconds from the the retention time / scan start time CV params.
+     * 
+     * @param scanTimeCvParams
+     * @return
+     */
+    private static Double extractRTfromCVs(List<CvParam> scanTimeCvParams) {
+        Double rt = null;
 
-        for (CvParam cvParam : scanStartCvParams) {
+        for (CvParam cvParam : scanTimeCvParams) {
             try {
                 rt = Double.parseDouble(cvParam.getValue());
 
diff --git a/src/main/java/de/mpc/pia/tools/OntologyConstants.java b/src/main/java/de/mpc/pia/tools/OntologyConstants.java
index be74ea8..49bcf99 100755
--- a/src/main/java/de/mpc/pia/tools/OntologyConstants.java
+++ b/src/main/java/de/mpc/pia/tools/OntologyConstants.java
@@ -27,6 +27,7 @@ public enum OntologyConstants {
     SEARCH_ENGINE_PSM_SCORE("search engine specific score for PSMs", "MS:1001143"),
     SPECTRUM_TITLE("spectrum title", "MS:1000796"),
     SCAN_START_TIME("scan start time", "MS:1000016"),
+    RETENTION_TIME("retention time", "MS:1000894"),
     SCAN_NUMBERS("scan number(s)", "MS:1001115"),
     DELTA_MZ("delta m/z", "MS:1001975"),
     CLEAVAGE_AGENT_NAME("cleavage agent name", "MS:1001045"),