From 42a7556b4ce85a2bd578156b54a4816c207b751e Mon Sep 17 00:00:00 2001 From: Daniel Vaulot Date: Tue, 26 Mar 2024 21:29:04 +0800 Subject: [PATCH] Update summarize_barrnap.py If there is more than one gene for a given domain, retains the lowest e-value (case of full rRNA operon sequences). --- bin/summarize_barrnap.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/bin/summarize_barrnap.py b/bin/summarize_barrnap.py index c7690bbea..6585d1568 100755 --- a/bin/summarize_barrnap.py +++ b/bin/summarize_barrnap.py @@ -3,7 +3,8 @@ # Takes a list of files with barrnap predictions (rrna.arc.gff, rrna.bac.gff, etc) # for ASV sequences, extracts evalues for each prediction and summarize the results # in a new file "summary.gff". Assumes that the same program/barrnap version is -# used for all predictions. +# used for all predictions. If there is more than one gene for a given domain, +# retains the lowest e-value (case of full rRNA operon sequences). # import pandas as pd import sys @@ -27,7 +28,8 @@ method[asv] = rowparts[1] if asv not in evalues: evalues[asv] = dict() - evalues[asv][org] = rowparts[5] + if (org not in evalues[asv]) or (float(evalues[asv][org]) > float(rowparts[5])) : + evalues[asv][org] = rowparts[5] fh.close() # Write results