nf-core · d4straub · Mar 26, 2024 · Mar 26, 2024 · Mar 26, 2024 · Mar 26, 2024
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -20,6 +20,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - [#717](https://github.com/nf-core/ampliseq/pull/717) - Fix edge case for sorting file names by using radix method
 - [#718](https://github.com/nf-core/ampliseq/pull/718) - Require a minimum sequence length of 50bp for taxonomic classifcation after using ITSx
 - [#721](https://github.com/nf-core/ampliseq/pull/721) - Fix error `unknown recognition error type: groovyjarjarantlr4.v4.runtime.LexerNoViableAltException` caused by a missing `\` in nf-core module `pigz/uncompress` (which had no consequences but was confusing)
+- [#722](https://github.com/nf-core/ampliseq/pull/722) - When barrnap detects several genes select the lowest e-value
 
 ### `Dependencies`
 

diff --git a/bin/summarize_barrnap.py b/bin/summarize_barrnap.py
@@ -3,7 +3,8 @@
 # Takes a list of files with barrnap predictions (rrna.arc.gff, rrna.bac.gff, etc)
 # for ASV sequences, extracts evalues for each prediction and summarize the results
 # in a new file "summary.gff". Assumes that the same program/barrnap version is
-# used for all predictions.
+# used for all predictions. If there is more than one gene for a given domain,
+# retains the lowest e-value (case of full rRNA operon sequences).
 
 # import pandas as pd
 import sys
@@ -27,7 +28,8 @@
         method[asv] = rowparts[1]
         if asv not in evalues:
             evalues[asv] = dict()
-        evalues[asv][org] = rowparts[5]
+        if (org not in evalues[asv]) or (float(evalues[asv][org]) > float(rowparts[5])):
+           evalues[asv][org] = rowparts[5]
     fh.close()
 
 # Write results