From d7b183ac6a5b6e35e20c0d1dea99322c5054a3e9 Mon Sep 17 00:00:00 2001 From: julianu Date: Tue, 16 Jul 2024 09:52:25 +0200 Subject: [PATCH 1/2] set number of maximal number threads by the CLI --- pom.xml | 2 +- src/main/java/de/mpc/pia/PIACli.java | 48 +++++++++++++++++-- .../de/mpc/pia/modeller/ProteinModeller.java | 16 ++++++- 3 files changed, 59 insertions(+), 7 deletions(-) diff --git a/pom.xml b/pom.xml index 6f3c8e7..66e9e83 100644 --- a/pom.xml +++ b/pom.xml @@ -4,7 +4,7 @@ de.mpc.pia pia - 1.5.4 + 1.5.5 PIA - Protein Inference Algorithms https://github.com/mpc-bioinformatics/pia diff --git a/src/main/java/de/mpc/pia/PIACli.java b/src/main/java/de/mpc/pia/PIACli.java index f6b1342..ab7019e 100755 --- a/src/main/java/de/mpc/pia/PIACli.java +++ b/src/main/java/de/mpc/pia/PIACli.java @@ -42,6 +42,11 @@ public class PIACli implements Runnable{ @Option(names = { "-o", "--outfile" }, description = "output file name (e.g. intermediate PIA file)") private String outfile; + + @Option(names = { "-t", "--threads" }, + description = "maximum number of used threads for compilation (0 for use all)", + defaultValue = "0") + private String threads; @Option(names = { "-n", "--name" }, description = "name of the compilation", @@ -89,12 +94,16 @@ public static void main(String[] args) { private void processCompile() { PIACompiler piaCompiler = new PIASimpleCompiler(); + int iThreads = parseThreads(); + LOGGER.debug("Compiler uses {} CPUs", iThreads); + piaCompiler.setNrThreads(iThreads); + // parse the command line arguments try { if (!parseCommandLineInfiles(piaCompiler)) { return; } - + piaCompiler.buildClusterList(); piaCompiler.buildIntermediateStructure(); @@ -108,6 +117,24 @@ private void processCompile() { } } + /** + * Parses the threads from the CLI into an integer value. + * + * @return integer value of CLI argument threads, 0 if errored (stands for use all CPUs) + */ + private int parseThreads() { + int iThreads = 0; + if (!threads.equals("0")) { + try { + iThreads = Integer.parseInt(threads); + } catch (NumberFormatException e) { + LOGGER.error("Could not parse the maximal number of threads, using all available CPUs"); + iThreads = 0; + } + } + return iThreads; + } + /** * Parses the files given from the command line in the String array into the * given {@link PIACompiler}. The files may also contain the name and @@ -193,7 +220,8 @@ private void processAnalysis() { } if (filesExist) { - processPIAAnalysis(infiles[0], infiles[1]); + int iThreads = parseThreads(); + processPIAAnalysis(infiles[0], infiles[1], iThreads); } } } @@ -205,7 +233,7 @@ private void processAnalysis() { * @param jsonFileName * @param piaFileName */ - public static boolean processPIAAnalysis(String jsonFileName, String piaFileName) { + public static boolean processPIAAnalysis(String jsonFileName, String piaFileName, int threads) { PIAModeller modeller = new PIAModeller(piaFileName); JsonAnalysis json = JsonAnalysis.readFromFile(new File(jsonFileName)); @@ -234,7 +262,7 @@ public static boolean processPIAAnalysis(String jsonFileName, String piaFileName if (processOK && json.isInfereProteins()) { // protein level - processOK = modeller.getProteinModeller().executeProteinOperations(json); + processOK = modeller.getProteinModeller().executeProteinOperations(json, threads); } if (processOK && (json.getProteinExportFile() != null)) { @@ -257,6 +285,18 @@ public static boolean processPIAAnalysis(String jsonFileName, String piaFileName return processOK; } + + /** + * Performs the actual PIA analysis parsing the JSON file for the PIA + * intermediate file. + * + * @param jsonFileName + * @param piaFileName + */ + public static boolean processPIAAnalysis(String jsonFileName, String piaFileName) { + return processPIAAnalysis(jsonFileName, piaFileName, 0); + } + /** * {@link IVersionProvider} implementation that returns version information. */ diff --git a/src/main/java/de/mpc/pia/modeller/ProteinModeller.java b/src/main/java/de/mpc/pia/modeller/ProteinModeller.java index 7f0fb0d..ad4879a 100755 --- a/src/main/java/de/mpc/pia/modeller/ProteinModeller.java +++ b/src/main/java/de/mpc/pia/modeller/ProteinModeller.java @@ -509,17 +509,19 @@ public boolean addInferenceFilter(AbstractFilter newFilter) { *

* If a required setting is not given, the default value is used. */ - public boolean executeProteinOperations(JsonAnalysis json) { + public boolean executeProteinOperations(JsonAnalysis json, int threads) { boolean allOk = true; AbstractProteinInference proteinInference = ProteinInferenceFactory.createInstanceOf(json.getInferenceMethod()); - + if (proteinInference == null) { LOGGER.error("Could not create inference method '{}'", json.getInferenceMethod()); allOk = false; } else { LOGGER.info("selected inference method: {}", proteinInference.getName()); + proteinInference.setAllowedThreads(threads); + LOGGER.debug("Protein inference using {} threads (0=all available)", proteinInference.getAllowedThreads()); } if (allOk) { @@ -558,4 +560,14 @@ public boolean executeProteinOperations(JsonAnalysis json) { return allOk; } + + + /** + * Execute analysis on protein level, getting the settings from JSON, using all available CPUs + *

+ * If a required setting is not given, the default value is used. + */ + public boolean executeProteinOperations(JsonAnalysis json) { + return executeProteinOperations(json, 0); + } } \ No newline at end of file From 6838dd6e43418d9931f293ab1dc998deef685160 Mon Sep 17 00:00:00 2001 From: julianu Date: Tue, 16 Jul 2024 10:53:58 +0200 Subject: [PATCH 2/2] treating "scan start time" and "retention time" equivalently for PSMs --- .../compiler/parser/MzIdentMLFileParser.java | 26 +++++++++++++++++-- .../de/mpc/pia/tools/OntologyConstants.java | 1 + 2 files changed, 25 insertions(+), 2 deletions(-) diff --git a/src/main/java/de/mpc/pia/intermediate/compiler/parser/MzIdentMLFileParser.java b/src/main/java/de/mpc/pia/intermediate/compiler/parser/MzIdentMLFileParser.java index 9d8c245..7983f7f 100755 --- a/src/main/java/de/mpc/pia/intermediate/compiler/parser/MzIdentMLFileParser.java +++ b/src/main/java/de/mpc/pia/intermediate/compiler/parser/MzIdentMLFileParser.java @@ -520,11 +520,33 @@ private static Double parseRetentionTime(SpectrumIdentificationResult specIdResu Double rt = null; // get the "scan start time" cvParams - List scanStartCvParams = specIdResult.getCvParam().stream() + List rtCvParams = specIdResult.getCvParam().stream() .filter(cvParam -> cvParam.getAccession().equals(OntologyConstants.SCAN_START_TIME.getPsiAccession())) .toList(); + rt = extractRTfromCVs(rtCvParams); + + if (rt == null) { + // did not find the RT with this param, try "retention time" + rtCvParams = specIdResult.getCvParam().stream() + .filter(cvParam -> cvParam.getAccession().equals(OntologyConstants.RETENTION_TIME.getPsiAccession())) + .toList(); + rt = extractRTfromCVs(rtCvParams); + } + + return rt; + } + + + /** + * Extract scan time in seconds from the the retention time / scan start time CV params. + * + * @param scanTimeCvParams + * @return + */ + private static Double extractRTfromCVs(List scanTimeCvParams) { + Double rt = null; - for (CvParam cvParam : scanStartCvParams) { + for (CvParam cvParam : scanTimeCvParams) { try { rt = Double.parseDouble(cvParam.getValue()); diff --git a/src/main/java/de/mpc/pia/tools/OntologyConstants.java b/src/main/java/de/mpc/pia/tools/OntologyConstants.java index be74ea8..49bcf99 100755 --- a/src/main/java/de/mpc/pia/tools/OntologyConstants.java +++ b/src/main/java/de/mpc/pia/tools/OntologyConstants.java @@ -27,6 +27,7 @@ public enum OntologyConstants { SEARCH_ENGINE_PSM_SCORE("search engine specific score for PSMs", "MS:1001143"), SPECTRUM_TITLE("spectrum title", "MS:1000796"), SCAN_START_TIME("scan start time", "MS:1000016"), + RETENTION_TIME("retention time", "MS:1000894"), SCAN_NUMBERS("scan number(s)", "MS:1001115"), DELTA_MZ("delta m/z", "MS:1001975"), CLEAVAGE_AGENT_NAME("cleavage agent name", "MS:1001045"),