From b2d39f6144a0a0caa903077cc4cb9a200a89d1c6 Mon Sep 17 00:00:00 2001 From: Vyacheslav Brover Date: Mon, 29 Jul 2024 16:24:14 -0400 Subject: [PATCH] PD-5064 AMBIGUOUS operon type --- common.cpp | 2 +- common.hpp | 8 ++++++++ stxtyper.cpp | 52 ++++++++++++++++++++++++++++++++++++++++++---------- version.txt | 2 +- 4 files changed, 52 insertions(+), 12 deletions(-) diff --git a/common.cpp b/common.cpp index b5d459f..dc5da19 100644 --- a/common.cpp +++ b/common.cpp @@ -4032,7 +4032,7 @@ int Application::run (int argc, } catch (const std::exception &e) { - errorExit ((e. what () + ifS (errno, string ("\n") + strerror (errno))). c_str ()); + errorExit ((ifS (errno, strerror (errno) + string ("\n")) + e. what ()). c_str ()); } diff --git a/common.hpp b/common.hpp index 2249cee..119bb32 100644 --- a/common.hpp +++ b/common.hpp @@ -1487,6 +1487,14 @@ inline string colorize (const string &s, return s; return Color::code (Color::white, true) + s + Color::code (); } + +inline string colorizeUrl (const string &s, + bool active) + { const string prefix ("https://"); + if (! active) + return prefix + s; + return Color::code (Color::blue, true) + prefix + s + Color::code (); + } class OColor diff --git a/stxtyper.cpp b/stxtyper.cpp index 176b838..5446144 100644 --- a/stxtyper.cpp +++ b/stxtyper.cpp @@ -32,7 +32,9 @@ * Dependencies: NCBI BLAST, gunzip (optional) * * Release changes: -* 1.0.21 07/15/2024 PD-5038 ----nucleotide_output +* 1.0.23 07/29/2024 PD-5064 AMBIGUOUS operon type +* 1.0.22 07/25/2024 First codon L|I|V -> M +* 1.0.21 07/15/2024 PD-5038 --nucleotide_output * 1.0.20 05/21/2024 PD-5002 {A|B}_reference_subtype * 1.0.19 03/26/2024 BlastAlignment::targetAlign is removed * 1.0.18 03/19/2024 PD-4910 Element symbol is _operon, Element name contains operon quality attribute" @@ -136,7 +138,7 @@ string stxType_reported_operon2elementName (const string &stxType_reported, struct BlastAlignment { - size_t length {0}, nident {0} // aa + size_t length {0}, nident {0}, positives {0} // aa , refStart {0}, refEnd {0}, refLen {0} , targetStart {0}, targetEnd {0}, targetLen {0}; // Positions are 0-based @@ -201,10 +203,27 @@ struct BlastAlignment length = targetSeq. size (); nident = 0; + positives = 0; QC_ASSERT (targetSeq. size () == refSeq. size ()); FFOR (size_t, i, targetSeq. size ()) if (targetSeq [i] == refSeq [i]) + { nident++; + positives++; + } + else + if ( targetSeq [i] == 'X' + || refSeq [i] == 'X' + ) + positives++; + if ( refStart == 0 + && charInSet (targetSeq [0], "LIV") + && refSeq [0] == 'M' + ) + { + nident++; + positives++; + } stxClass = stxType; if ( stxType == "2a" @@ -241,14 +260,16 @@ struct BlastAlignment return; QC_ASSERT (length); QC_ASSERT (nident); - QC_ASSERT (nident <= length); + QC_ASSERT (nident <= positives); + //QC_ASSERT (nident <= length); + QC_ASSERT (positives <= length); QC_ASSERT (targetStart < targetEnd); QC_ASSERT (targetEnd <= targetLen); QC_ASSERT (refStart < refEnd); QC_ASSERT (refEnd <= refLen); if (! frameshift) { - QC_ASSERT (nident <= refEnd - refStart); + QC_ASSERT (positives <= refEnd - refStart); QC_ASSERT (refEnd - refStart <= length); } QC_ASSERT (! targetName. empty ()); @@ -359,9 +380,12 @@ struct BlastAlignment refStart = prev. refStart; else refEnd = prev. refEnd; - length += prev. length; // Approximately - nident += prev. nident; // Approximately + // Approximately + length += prev. length; + nident += prev. nident; + positives += prev. positives; //targetAlign += prev. targetAlign; + // if (prev. stopCodon) stopCodon = true; frameshift = true; @@ -402,6 +426,8 @@ struct BlastAlignment s += string (len - refEnd, '-'); return s; } + bool ambig () const + { return nident < positives; } static bool frameshiftLess (const BlastAlignment* a, const BlastAlignment* b) { ASSERT (a); @@ -502,7 +528,7 @@ struct Operon string stxType (getStxType (verboseP)); const string standard ("COMPLETE"); const bool novel = al1->stxClass != al2->stxClass - || getIdentity () < stxClass2identity [al1->stxClass] // May be due to X's + || getIdentity () < stxClass2identity [al1->stxClass] || stxType. size () <= 1; const string operonType = getA () -> frameshift || getB () -> frameshift @@ -514,12 +540,14 @@ struct Operon || getB () -> truncated () ? "PARTIAL_CONTIG_END" : partial () - ? "PARTIAL" + ? "PARTIAL" // Complete ?? : getA () -> getExtended () || getB () -> getExtended () - ? "EXTENDED" + ? "EXTENDED" // Complete ?? : novel - ? standard + "_NOVEL" + ? ambig () + ? "AMBIGUOUS" + : standard + "_NOVEL" : standard; if (! verboseP) { @@ -650,6 +678,10 @@ struct Operon { return (getA () -> getRelCoverage () < 1.0 && ! getA () -> getExtended ()) || (getB () -> getRelCoverage () < 1.0 && ! getB () -> getExtended ()); } + bool ambig () const + { return getA () -> ambig () + || getB () -> ambig (); + } public: double getIdentity () const { return double (al1->nident + al2->nident) / double (al1->length/*refLen*/ + al2->length/*refLen*/); } diff --git a/version.txt b/version.txt index b112f91..154b9fc 100644 --- a/version.txt +++ b/version.txt @@ -1 +1 @@ -1.0.21 +1.0.23