From b845df0808194a834471b382ae2b7c263024d009 Mon Sep 17 00:00:00 2001 From: Vyacheslav Brover Date: Tue, 22 Oct 2024 17:09:18 -0400 Subject: [PATCH 1/3] PD-5085 Change column "Element length" to "Target length" --- amrfinder_columns.hpp | 43 ++- common.cpp | 183 +++++++++---- common.hpp | 382 +++++++++++++++++---------- common.inc | 2 +- stxtyper.cpp | 3 +- test/amrfinder_integration.expected | 2 +- test/amrfinder_integration2.expected | 2 +- tsv.hpp | 1 + version.txt | 2 +- 9 files changed, 433 insertions(+), 187 deletions(-) diff --git a/amrfinder_columns.hpp b/amrfinder_columns.hpp index 5501b6f..cf25a92 100644 --- a/amrfinder_columns.hpp +++ b/amrfinder_columns.hpp @@ -1,4 +1,38 @@ -// AMRFinderPlus column names +// columns.hpp + +/*=========================================================================== +* +* PUBLIC DOMAIN NOTICE +* National Center for Biotechnology Information +* +* This software/database is a "United States Government Work" under the +* terms of the United States Copyright Act. It was written as part of +* the author's official duties as a United States Government employee and +* thus cannot be copyrighted. This software/database is freely available +* to the public for use. The National Library of Medicine and the U.S. +* Government have not placed any restriction on its use or reproduction. +* +* Although all reasonable efforts have been taken to ensure the accuracy +* and reliability of the software and data, the NLM and the U.S. +* Government do not and cannot warrant the performance or results that +* may be obtained by using this software or data. The NLM and the U.S. +* Government disclaim all warranties, express or implied, including +* warranties of performance, merchantability or fitness for any particular +* purpose. +* +* Please cite the author in any work or product based on this material. +* +* =========================================================================== +* +* Author: Vyacheslav Brover +* +* File Description: +* AMRFinderPlus column names +* +*/ + + + // PD-5085 constexpr const char* prot_colName = "Protein id"; // PD-2534 @@ -18,7 +52,7 @@ constexpr const char* class_colName = "Class"; constexpr const char* subclass_colName = "Subclass"; // constexpr const char* method_colName = "Method"; -constexpr const char* targetLen_colName = "Element length"; // was: "Target length" ?? +constexpr const char* targetLen_colName = "Target length"; // was: "Element length" (temporarily) constexpr const char* refLen_colName = "Reference sequence length"; constexpr const char* refCov_colName = "% Coverage of reference"; constexpr const char* refIdent_colName = "% Identity to reference"; @@ -28,3 +62,8 @@ constexpr const char* closestRefName_colName = "Closest reference name"; constexpr const char* hmmAccession_colName = "HMM accession"; constexpr const char* hmmDescr_colName = "HMM description"; constexpr const char* hierarchyNode_colName = "Hierarchy node"; + + +// PD-5155 +constexpr const char* fusion_infix = "::"; // was: "/" + \ No newline at end of file diff --git a/common.cpp b/common.cpp index b53b39f..f77af4c 100644 --- a/common.cpp +++ b/common.cpp @@ -297,7 +297,7 @@ string getStack () char** strings = backtrace_symbols (buffer, nptrs); if (strings /*&& ! which ("addr2line"). empty ()*/) { - FOR_START (int, i, 1, nptrs) + FOR_REV_END (int, i, 1, nptrs) s += string (strings [i]) + "\n"; s += "Use: addr2line -f -C -e " + programArgs [0] + " -a
"; //free (strings); @@ -320,6 +320,10 @@ string getStack () +//bool InputError::on = false; + + + // @@ -340,6 +344,15 @@ bool isRedirected (const ostream &os) +void sleepNano (long nanoSec) +{ + const timespec request = {0, nanoSec}; + timespec remaining; + EXEC_ASSERT (! nanosleep (& request, & remaining)); +} + + + void beep () { if (getEnv ("SHLVL") != "1") @@ -364,7 +377,7 @@ void Chronometer::start () { if (! on ()) return; - if (startTime != noclock) + if (started ()) throw runtime_error (FUNC "Chronometer \"" + name + "\" is not stopped"); startTime = clock (); } @@ -375,7 +388,7 @@ void Chronometer::stop () { if (! on ()) return; - if (startTime == noclock) + if (! started ()) throw runtime_error (FUNC "Chronometer \"" + name + "\" is not started"); time += clock () - startTime; startTime = noclock; @@ -533,6 +546,23 @@ string nonPrintable2str (char c) +string to_url (const string &s) +{ + string url; + for (const char c : s) + if ( isLetter (c) + || isDigit (c) + || c == '_' + ) + url += c; + else + url += "%" + uchar2hex ((uchar) c); + + return url; +} + + + bool isRight (const string &s, const string &right) { @@ -1241,7 +1271,7 @@ void removeDirectory (const string &dirName) { case Filetype::link: if (unlink (name. c_str ())) - throw runtime_error ("cannot unlink " + strQuote (name)); + throw logic_error ("cannot unlink " + strQuote (name)); break; case Filetype::dir: removeDirectory (name); @@ -1250,17 +1280,45 @@ void removeDirectory (const string &dirName) removeFile (name); break; default: - throw runtime_error ("Cannot remove directory item " + strQuote (name) + " of type " + strQuote (filetype2name (t))); + throw logic_error ("Cannot remove directory item " + strQuote (name) + " of type " + strQuote (filetype2name (t))); } } if (rmdir (dirName. c_str ())) - throw runtime_error ("Cannot remove directory " + strQuote (dirName)); + throw logic_error ("Cannot remove directory " + strQuote (dirName)); +} + + + +string makeTempDir () +{ + string tmpDir (getEnv ("TMPDIR")); + if (tmpDir. empty ()) + tmpDir = "/tmp"; + + string tmp = tmpDir + "/" + programName + ".XXXXXX"; + if (! mkdtemp (var_cast (tmp. c_str ()))) + throw runtime_error ("Error creating a temporary directory in " + tmpDir); + if (tmp. empty ()) + throw runtime_error ("Cannot create a temporary directory in " + tmpDir); + + { + const string testFName (tmp + "/test"); + { + ofstream f (testFName); + f << "abc" << endl; + if (! f. good ()) + throw runtime_error (tmpDir + " is full, make space there or use environment variable TMPDIR to change location for temporary files"); + } + removeFile (testFName); + } + + return tmp; } void concatTextDir (const string &inDirName, - const string &outFName) + const string &outFName) { RawDirItemGenerator dig (0, inDirName, false); OFStream outF (outFName); @@ -2058,6 +2116,22 @@ void Progress::report () const // TextPos +string TextPos::str () const +{ + if (lineNum == -1) + return noString; + return "line " + to_string (lineNum + 1) + ", " + + (eol () + ? "end of line" + : last () + ? "last position" + : "pos. " + to_string (charNum + 1) + ) + + ": "; +} + + + void TextPos::inc (bool eol_arg) { if (eol ()) @@ -2315,7 +2389,7 @@ void Token::readInput (CharInput &in, qc (); if (verbose ()) - cout << type2str (type) << ' ' << *this << ' ' << tp. str () << endl; + cout << tp. str () << type2str (type) << ' ' << *this << endl; } @@ -2470,13 +2544,17 @@ Token TokenInput::get () const Token last_ (last); last = Token (); if (! last_. empty ()) + { + tp = last_. tp; return last_; + } for (;;) { Token t (ci, dashInName, consecutiveQuotesInText); if (t. empty ()) break; + tp = t. tp; if (! t. isDelimiter (commentStart)) return t; ci. getLine (); @@ -2728,6 +2806,31 @@ char TokenInput::getNextChar (bool unget) +// BraceInput + +void BraceInput::skipComment () +{ + get ('{'); + size_t n = 1; + for (;;) + { + const Token t (get ()); + if (t. isDelimiter ('{')) + n++; + else if (t. isDelimiter ('}')) + { + ASSERT (n); + n--; + if (! n) + break; + } + } +//get (endChar); +} + + + + // IFStream IFStream::IFStream (const string &pathName) @@ -3235,8 +3338,8 @@ struct RawDirItemGenerator::Imp RawDirItemGenerator::RawDirItemGenerator (size_t progress_displayPeriod, - const string& dirName_arg, - bool large_arg) + const string& dirName_arg, + bool large_arg) : ItemGenerator (0, progress_displayPeriod) , dirName (dirName_arg) , imp (new Imp (dirName_arg)) @@ -4030,9 +4133,24 @@ int Application::run (int argc, jRoot. reset (); } } + catch (const std::range_error &e) { errorExitStr (string ("Range error: ") + e. what ()); } + catch (const std::overflow_error &e) { errorExitStr (string ("Overflow error: ") + e. what ()); } + catch (const std::underflow_error &e) { errorExitStr (string ("Underflow error: ") + e. what ()); } + catch (const std::system_error &e) { errorExitStr (string ("System error: ") + e. what ()); } + catch (const std::runtime_error &e) + { + beep (); + ostream* os = logPtr ? logPtr : & cerr; + { + const OColor oc (*os, Color::red, true, true); + *os << error_caption; + } + *os << endl << e. what () << endl; + exit (1); + } catch (const std::exception &e) { - errorExit ((ifS (errno, strerror (errno) + string ("\n")) + e. what ()). c_str ()); + errorExitStr (ifS (errno, strerror (errno) + string ("\n")) + e. what ()); } @@ -4046,7 +4164,7 @@ int Application::run (int argc, ShellApplication::~ShellApplication () { - if (tmpCreated && ! logPtr) + if (! tmp. empty () && ! logPtr) removeDirectory (tmp); if (startTime) @@ -4064,16 +4182,6 @@ void ShellApplication::initEnvironment () ASSERT (tmp. empty ()); ASSERT (! programArgs. empty ()); - // tmp - if (useTmp) - { - string s (getEnv ("TMPDIR")); - if (s. empty ()) - tmp = "/tmp"; - else - tmp = std::move (s); - } - // execDir, programName execDir = getProgramDirName (); if (execDir. empty ()) @@ -4097,30 +4205,9 @@ void ShellApplication::initEnvironment () void ShellApplication::initVar () { - ASSERT (! tmpCreated); - + ASSERT (tmp. empty ()); if (useTmp) - { - const string tmpDir (tmp); - tmp += "/" + programName + ".XXXXXX"; - if (! mkdtemp (var_cast (tmp. c_str ()))) - throw runtime_error ("Error creating a temporary directory in " + tmpDir); - if (tmp. empty ()) - throw runtime_error ("Cannot create a temporary directory in " + tmpDir); - - { - const string testFName (tmp + "/test"); - { - ofstream f (testFName); - f << "abc" << endl; - if (! f. good ()) - throw runtime_error (tmpDir + " is full, make space there or use environment variable TMPDIR to change location for temporary files"); - } - removeFile (testFName); - } - - tmpCreated = true; - } + tmp = makeTempDir (); stderr. quiet = getQuiet (); @@ -4154,7 +4241,7 @@ string ShellApplication::getHelp (bool screen) const void ShellApplication::body () const { - if (useTmp) + if (! tmp. empty ()) LOG (tmp); shellBody (); } @@ -4199,6 +4286,7 @@ string ShellApplication::exec2str (const string &cmd, const string &tmpName, const string &logFName) const { + ASSERT (! tmp. empty ()); ASSERT (! contains (tmpName, ' ')); const string out (tmp + "/" + tmpName); exec (cmd + " > " + out, logFName); @@ -4213,6 +4301,7 @@ string ShellApplication::exec2str (const string &cmd, string ShellApplication::uncompress (const string "edFName, const string &suffix) const { + ASSERT (! tmp. empty ()); const string res (shellQuote (tmp + "/" + suffix)); QC_ASSERT (quotedFName != res); const string s (unQuote (quotedFName)); @@ -4229,6 +4318,8 @@ string ShellApplication::uncompress (const string "edFName, string ShellApplication::getBlastThreadsParam (const string &blast, size_t threads_max_max) const { + ASSERT (! tmp. empty ()); + const size_t t = min (threads_max, threads_max_max); if (t <= 1) // One thread is main return noString; diff --git a/common.hpp b/common.hpp index 3eb5da5..d282cd0 100644 --- a/common.hpp +++ b/common.hpp @@ -102,7 +102,6 @@ bool initCommon (); // Invoked automaticallly - // Numeric types typedef unsigned char uchar; @@ -149,10 +148,30 @@ constexpr const char* error_caption ("*** ERROR ***"); void errorExitStr (const string &msg); // For debugger: should not be inline + // Invokes: beep() [[noreturn]] void throwf (const string &s); // For debugger: should not be inline - // Invokes: logic_error + // Invokes: throw logic_error + + +#if 0 +struct InputError : runtime_error // ?? +{ + static bool on; + // Init: false + + InputError (const string &what_arg) + : runtime_error (what_arg) + { on = true; } +}; +#endif + + + +void sleepNano (long nanoSec); + + void beep (); // Requires: !isRedirected() @@ -311,7 +330,7 @@ template } template - bool contains (const array &arr, const T item) + inline bool contains (const array &arr, const T item) { return indexOf (arr, item) != no_index; } template @@ -471,10 +490,17 @@ extern hash str_hash; extern hash size_hash; -constexpr size_t hash_class_max = 1000; // PAR +// PAR +constexpr size_t small_hash_class_max = 1000; +constexpr size_t large_hash_class_max = small_hash_class_max * 100; -inline size_t str2hash_class (const string &s) - { return str_hash (s) % hash_class_max; } +inline size_t str2hash_class (const string &s, + bool large_hash) + { return str_hash (s) % (large_hash + ? large_hash_class_max + : small_hash_class_max + ); + } @@ -574,6 +600,7 @@ inline bool isDelimiter (char c) inline bool isSpace (char c) { return c > '\0' && c <= ' ' && isspace (c); } +string to_url (const string &s); // char* @@ -702,6 +729,7 @@ bool goodName (const string &name); bool isIdentifier (const string& name, bool dashInName); + // Return: true. !empty(), !dashInName => (c \in mame => isLetter(c)) bool isNatural (const string& name, bool leadingZeroAllowed); @@ -1065,6 +1093,10 @@ template } void swap () { std::swap (P::first, P::second); } + bool empty () const + { return P::first == T () + && P::second == T (); + } }; @@ -1338,6 +1370,9 @@ void copyText (const string &inFName, void removeDirectory (const string &dirName); // With its contents + + string makeTempDir (); + // And test void concatTextDir (const string &inDirName, const string &outFName); @@ -1697,6 +1732,29 @@ void exec (const string &cmd, +struct Lock +{ + const bool active; +private: + std::mutex& mtx; +public: + + + explicit Lock (std::mutex &mtx_arg, + bool active_arg = true) + : active (active_arg) + , mtx (mtx_arg) + { if (active) + mtx_arg. lock (); + } + ~Lock () + { if (active) + mtx. unlock (); + } +}; + + + // Threads extern size_t threads_max; @@ -2071,6 +2129,18 @@ struct VirtNamed : Root }; +template + inline bool containsNamed (const map &m, + const Value* t) + { if (! t) + return false; + const auto& it = m. find (t->getName ()); + if (it == m. end ()) + return false; + return it->second == t; + } + + struct Named : VirtNamed { @@ -2208,6 +2278,22 @@ template n++; return n; } + bool overlapStart (size_t start, + const vector &other) const + { const size_t end = min (P::size (), start + other. size ()); + for (size_t i = start, j = 0; i < end; i++, j++) + if ((*this) [i] != other [j]) + return false; + return true; + } + size_t overlapStart_min (const vector &other, + size_t start_min) const + { for (size_t start = start_min; start < P::size (); start++) + if (overlapStart (start, other)) + return start; + return P::size (); + } + // a = v.overlapStart_min(v,1), a < v.size(), v.size() % a == 0 => a is the period of v void checkSorted () const { if (! searchSorted) throw runtime_error ("Vector is not sorted for search"); @@ -2706,6 +2792,11 @@ template { P::operator<< (other); return *this; } + template */> + VectorPtr& operator<< (VectorPtr &&other) + { P::operator<< (std::move (other)); + return *this; + } void deleteData () { for (const T* t : *this) delete t; @@ -2863,97 +2954,6 @@ template -struct StringVector : Vector -{ -private: - typedef Vector P; -public: - - - StringVector () = default; - explicit StringVector (initializer_list init) - : P (init) - {} - StringVector (const string &fName, - size_t reserve_size, - bool trimP); - StringVector (const string &s, - char sep, - bool trimP); - explicit StringVector (size_t n) - : P (n, noString) - {} - - - string toString (const string& sep) const; - string toString () const - { return toString (noString); } - bool same (const StringVector &vec, - const Vector &indexes) const; - void to_xml (Xml::File &f, - const string &tag); - // XML: at(0) at(1) ... - // Invokes: sort(), clear() - - - struct Hasher - { - size_t operator () (const StringVector& vec) const - { size_t ret = 0; - for (const string& s : vec) - ret ^= hash() (s); - return ret; - } - }; -}; - - - -template - StringVector set2vec (const set &s) - { StringVector vec; vec. reserve (s. size ()); - for (const Key* key : s) - { assert (key); - vec << key->getName (); - } - return vec; - } - - - -struct Csv -// Line of Excel .csv-file -{ -private: - const string &s; - size_t pos {0}; -public: - - - explicit Csv (const string &s_arg) - : s (s_arg) - {} - - - bool goodPos () const - { return pos < s. size (); } - string getWord (); - // Return: Next word - // Requires: goodPos() -private: - void findChar (char c) - { while (goodPos () && s [pos] != c) - pos++; - } -}; - - - -StringVector csvLine2vec (const string &line); - // Invokes: Csv - - - template struct Set : set { @@ -3188,6 +3188,101 @@ template +struct StringVector : Vector +{ +private: + typedef Vector P; +public: + + + StringVector () = default; + explicit StringVector (initializer_list init) + : P (init) + {} + explicit StringVector (const Set &from) + { insertAll (*this, from); + searchSorted = true; + } + StringVector (const string &fName, + size_t reserve_size, + bool trimP); + StringVector (const string &s, + char sep, + bool trimP); + explicit StringVector (size_t n) + : P (n, noString) + {} + + + string toString (const string& sep) const; + string toString () const + { return toString (noString); } + bool same (const StringVector &vec, + const Vector &indexes) const; + void to_xml (Xml::File &f, + const string &tag); + // XML: at(0) at(1) ... + // Invokes: sort(), clear() + + + struct Hasher + { + size_t operator () (const StringVector& vec) const + { size_t ret = 0; + for (const string& s : vec) + ret ^= hash() (s); + return ret; + } + }; +}; + + + +template + StringVector set2vec (const set &s) + { StringVector vec; vec. reserve (s. size ()); + for (const Key* key : s) + { assert (key); + vec << key->getName (); + } + return vec; + } + + + +struct Csv +// Line of Excel .csv-file +{ +private: + const string &s; + size_t pos {0}; +public: + + + explicit Csv (const string &s_arg) + : s (s_arg) + {} + + + bool goodPos () const + { return pos < s. size (); } + string getWord (); + // Return: Next word + // Requires: goodPos() +private: + void findChar (char c) + { while (goodPos () && s [pos] != c) + pos++; + } +}; + + + +StringVector csvLine2vec (const string &line); + // Invokes: Csv + + + template struct RandomSet // Set stored in a vector for a random access @@ -3260,7 +3355,6 @@ template public: - explicit Heap (const CompareInt &comp_arg, const SetHeapIndex &setHeapIndex_arg = nullptr, size_t toReserve = 0) @@ -3372,9 +3466,9 @@ template // Test static void testStr () - { StringVector vec {"Moscow", "San Diego", "Los Angeles", "Paris"}; - Heap heap (strComp); - for (string& s : vec) + { const StringVector vec {"Moscow", "San Diego", "Los Angeles", "Paris"}; + Heap heap (strComp); + for (const string& s : vec) heap << & s; while (! heap. empty ()) { cout << * heap. getMaximum () << endl; @@ -3386,8 +3480,8 @@ template const void* s2) { const string& s1_ = * static_cast (s1); const string& s2_ = * static_cast (s2); - if (s1_ > s2_) return -1; if (s1_ < s2_) return 1; + if (s1_ > s2_) return -1; return 0; } }; @@ -3453,8 +3547,14 @@ struct Chronometer : Nocopy bool on () const { return enabled && threads_max == 1; } + bool started () const + { return startTime != noclock; } void start (); + // Requires: !started() void stop (); + // Requires: started() + void cancel () + { startTime = noclock; } void print (ostream &os) const; }; @@ -3594,15 +3694,7 @@ struct TextPos { return lineNum == other. lineNum && charNum == other. charNum; } - string str () const - { return "line " + to_string (lineNum + 1) + ", " + - (eol () - ? "end of line" - : last () - ? "last position" - : "pos. " + to_string (charNum + 1) - ); - } + string str () const; void inc (bool eol_arg); @@ -3616,12 +3708,12 @@ struct TextPos struct Error : runtime_error - { Error (const TextPos tp, - const string &what, - bool expected = true) - : runtime_error ((tp. str () + ": " + what + ifS (expected, " is expected")). c_str ()) - {} - }; + { Error (const TextPos tp, + const string &what, + bool expected = true) + : runtime_error ((tp. str () + what + ifS (expected, " is expected")). c_str ()) + {} + }; }; @@ -3678,8 +3770,6 @@ struct LineInput : Input bool nextLine (); // Output: line - // Update: tp - // Invokes: trimTrailing() bool expectPrefix (const string &prefix, bool eofAllowed) { if (nextLine () && trimPrefix (line, prefix)) @@ -3764,9 +3854,9 @@ struct Token : Root enum Type { eName , eDelimiter , eText - , eInteger + , eInteger // 10-based or 16-based: 0xNNNN... , eDouble - , eDateTime + , eDateTime // Example: 2018-08-13T16:12:54.487 }; // Valid if !empty() Type type {eDelimiter}; @@ -3840,6 +3930,9 @@ struct Token : Root { *this = Token (); } + [[noreturn]] void error (const string &what, + bool expected = true) const + { throw TextPos::Error (tp, what, expected); } static string type2str (Type type) { switch (type) { case eName: return "name"; @@ -3903,6 +3996,7 @@ struct TokenInput : Root const bool consecutiveQuotesInText; // Two quotes encode one quote Token last; + TextPos tp; public: @@ -3928,13 +4022,9 @@ struct TokenInput : Root {} - [[noreturn]] void error (const Token &wrongToken, - const string &what, - bool expected = true) const - { throw TextPos::Error (wrongToken. tp, what, expected); } [[noreturn]] void error (const string &what, bool expected = true) const - { ci. error (what, expected); } + { throw TextPos::Error (tp, what, expected); } Token get (); // Return: empty() <=> EOF @@ -3952,17 +4042,17 @@ struct TokenInput : Root void get (const string &expected) { const Token t (get ()); if (! t. isNameText (expected)) - error (t, Token::type2str (Token::eName) + " " + strQuote (expected)); + t. error (Token::type2str (Token::eName) + " " + strQuote (expected)); } void get (int expected) { const Token t (get ()); if (! t. isInteger (expected)) - error (t, Token::type2str (Token::eInteger) + " " + to_string (expected)); + t. error (Token::type2str (Token::eInteger) + " " + to_string (expected)); } void get (double expected) { const Token t (get ()); if (! t. isDouble (expected)) - error (t, Token::type2str (Token::eDouble) + " " + toString (expected)); + t. error (Token::type2str (Token::eDouble) + " " + toString (expected)); } void get (char expected) { const Token t (get ()); @@ -3970,7 +4060,9 @@ struct TokenInput : Root error (Token::type2str (Token::eDelimiter) + " " + strQuote (toString (expected), '\'')); } void setLast (Token &&t) - { last = std::move (t); } + { last = std::move (t); + tp = last. tp; + } bool getNext (char expected) { Token token (get ()); if (! token. isDelimiter (expected)) @@ -3984,6 +4076,29 @@ struct TokenInput : Root +struct BraceInput : TokenInput +{ + static constexpr char commentC {'#'}; + static constexpr const char* commentS {"comment"}; + static constexpr char endChar {';'}; + + + explicit BraceInput (const string &fName) + : TokenInput (fName, commentC) + {} + explicit BraceInput (istream &is_arg) + : TokenInput (is_arg, commentC) + {} + + + static string endS () + { return string (1, endChar); } + void skipComment (); +}; + + + + ///////////////////////////////////// Json ////////////////////////////////////////// extern unique_ptr jRoot; @@ -4639,6 +4754,7 @@ struct Application : Singleton, Root int run (int argc, const char* argv []); // Invokes: body() + // if runtime_error then exit(1) else errorExit() private: virtual void body () const = 0; // Invokes: initEnvironment() @@ -4656,9 +4772,7 @@ struct ShellApplication : Application string tmp; // Temporary directory: ($TMPDIR or "/tmp") + "/" + programName + "XXXXXX" // If log is used then tmp is printed in the log file and the temporary files are not deleted -private: - bool tmpCreated {false}; -protected: + // !empty() => useTmp string execDir; // Ends with '/' // Physically real directory of the software @@ -4700,7 +4814,7 @@ struct ShellApplication : Application string exec2str (const string &cmd, const string &tmpName, const string &logFName = noString) const; - // Return: `cmd > /tmpName && cat /tmpName` + // Return: $( cmd > /tmpName && cat /tmpName ) // Requires: cmd produces one line string uncompress (const string "edFName, const string &suffix) const; diff --git a/common.inc b/common.inc index b72d4b4..a662b9b 100644 --- a/common.inc +++ b/common.inc @@ -73,7 +73,7 @@ #define ERROR_MSG(msg) \ { if (! std::uncaught_exceptions ()) \ - throw runtime_error (std::string ("\"" __FILE__ "\", line ") + to_string (__LINE__) + ", in " + (FUNC) + (msg)); \ + throwf (std::string ("\"" __FILE__ "\", line ") + to_string (__LINE__) + ", in " + (FUNC) + (msg)); \ exit (1); \ } #define ERROR ERROR_MSG ("ERROR") diff --git a/stxtyper.cpp b/stxtyper.cpp index 9650c5d..a8c55eb 100644 --- a/stxtyper.cpp +++ b/stxtyper.cpp @@ -32,6 +32,7 @@ * Dependencies: NCBI BLAST, gunzip (optional) * * Release changes: +* 1.0.26 10/22/2024 PD-5085 Change column "Element length" to "Target length" * 1.0.25 08/16/2024 PD-5085 AMRFinderPlus column names to match MicroBIGG-E * 1.0.24 08/05/2024 PD-5076 "na" -> "NA" * 1.0.23 07/29/2024 PD-5064 AMBIGUOUS operon type @@ -649,7 +650,7 @@ struct Operon return al1->stxType; if (al1->stxClass != al2->stxClass) { - //return al1->stxClass + "/" + al2->stxClass; ?? // order alphabetically + //return al1->stxClass + fusion_infix + al2->stxClass; ?? // order alphabetically if (al1->stxSuperClass == al2->stxSuperClass) return al1->stxSuperClass; return noString; diff --git a/test/amrfinder_integration.expected b/test/amrfinder_integration.expected index b447c3b..4e96184 100644 --- a/test/amrfinder_integration.expected +++ b/test/amrfinder_integration.expected @@ -1,4 +1,4 @@ -#Protein id Contig id Start Stop Strand Element symbol Element name Scope Type Subtype Class Subclass Method Element length Reference sequence length % Coverage of reference % Identity to reference Alignment length Closest reference accession Closest reference name HMM accession HMM description +#Protein id Contig id Start Stop Strand Element symbol Element name Scope Type Subtype Class Subclass Method Target length Reference sequence length % Coverage of reference % Identity to reference Alignment length Closest reference accession Closest reference name HMM accession HMM description NA partial 27 1048 + stx2_operon Partial stx2 operon plus VIRULENCE STX_TYPE STX2 STX2 PARTIAL 1022 99.41 337 AAA16362.1, AAS07607.1 Shiga toxin stx2 NA NA NA partial_contig_end 3 661 - stx2_operon Partial stx2 operon plus VIRULENCE STX_TYPE STX2 STX2 PARTIAL_CONTIG_END 659 100.00 216 AAM70046.1, AAA16362.1 Shiga toxin stx2 NA NA NA stx1a 218 1444 + stx1a_operon stx1a operon plus VIRULENCE STX_TYPE STX1 STX1A COMPLETE 1227 100.00 406 AAA98347.1, AAA71894.1 Shiga toxin stx1a NA NA diff --git a/test/amrfinder_integration2.expected b/test/amrfinder_integration2.expected index e16e1b5..9988894 100644 --- a/test/amrfinder_integration2.expected +++ b/test/amrfinder_integration2.expected @@ -1,4 +1,4 @@ -#Protein id Contig id Start Stop Strand Element symbol Element name Scope Type Subtype Class Subclass Method Element length Reference sequence length % Coverage of reference % Identity to reference Alignment length Closest reference accession Closest reference name HMM accession HMM description Hierarchy node +#Protein id Contig id Start Stop Strand Element symbol Element name Scope Type Subtype Class Subclass Method Target length Reference sequence length % Coverage of reference % Identity to reference Alignment length Closest reference accession Closest reference name HMM accession HMM description Hierarchy node NA partial 27 1048 + stx2_operon Partial stx2 operon plus VIRULENCE STX_TYPE STX2 STX2 PARTIAL 1022 99.41 337 AAA16362.1, AAS07607.1 Shiga toxin stx2 NA NA stxA2c, stxB2a NA partial_contig_end 3 661 - stx2_operon Partial stx2 operon plus VIRULENCE STX_TYPE STX2 STX2 PARTIAL_CONTIG_END 659 100.00 216 AAM70046.1, AAA16362.1 Shiga toxin stx2 NA NA stxB2a, stxA2c NA stx1a 218 1444 + stx1a_operon stx1a operon plus VIRULENCE STX_TYPE STX1 STX1A COMPLETE 1227 100.00 406 AAA98347.1, AAA71894.1 Shiga toxin stx1a NA NA stxA1a, stxB1a diff --git a/tsv.hpp b/tsv.hpp index 26af24f..261a024 100644 --- a/tsv.hpp +++ b/tsv.hpp @@ -167,6 +167,7 @@ struct TextTable : Named // size() = number of columns Vector rows; // StringVector::size() = header.size() + // Values are trim()'ed typedef size_t ColNum; // no_index <=> no column typedef size_t RowNum; diff --git a/version.txt b/version.txt index 4a4127c..8955a01 100644 --- a/version.txt +++ b/version.txt @@ -1 +1 @@ -1.0.25 +1.0.26 From ddc66abc95a0d74c4fbd0729dde1a7f772686b8d Mon Sep 17 00:00:00 2001 From: Vyacheslav Brover Date: Wed, 23 Oct 2024 10:39:57 -0400 Subject: [PATCH 2/3] PD-5155 "Hierarchy node" with mixed types is :: --- stxtyper.cpp | 5 +++-- version.txt | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/stxtyper.cpp b/stxtyper.cpp index a8c55eb..7e365fc 100644 --- a/stxtyper.cpp +++ b/stxtyper.cpp @@ -32,6 +32,7 @@ * Dependencies: NCBI BLAST, gunzip (optional) * * Release changes: +* 1.0.27 10/23/2024 PD-5155 "Hierarchy node" with mixed types is :: * 1.0.26 10/22/2024 PD-5085 Change column "Element length" to "Target length" * 1.0.25 08/16/2024 PD-5085 AMRFinderPlus column names to match MicroBIGG-E * 1.0.24 08/05/2024 PD-5076 "na" -> "NA" @@ -581,7 +582,7 @@ struct Operon //const double refCoverage = double (al1->getAbsCoverage () + al2->getAbsCoverage ()) / double (refLen) * 100.0; const size_t alignmentLen = al1->length + al2->length; const string refAccessions (al1->refAccession + ", " + al2->refAccession); - const string fam (al1->getGenesymbol () + ", " + al2->getGenesymbol ()); + const string fam (al1->getGenesymbol () + fusion_infix + al2->getGenesymbol ()); td << na // 1 "Protein identifier" << targetName // 2 "Contig id" << start // 3 "Start" @@ -650,7 +651,7 @@ struct Operon return al1->stxType; if (al1->stxClass != al2->stxClass) { - //return al1->stxClass + fusion_infix + al2->stxClass; ?? // order alphabetically + //return al1->stxClass + fusion_infix + al2->stxClass; // order alphabetically if (al1->stxSuperClass == al2->stxSuperClass) return al1->stxSuperClass; return noString; diff --git a/version.txt b/version.txt index 8955a01..adb7b04 100644 --- a/version.txt +++ b/version.txt @@ -1 +1 @@ -1.0.26 +1.0.27 From de398b21eb27cb80685e3dde37f5498c33214078 Mon Sep 17 00:00:00 2001 From: Arjun Prasad Date: Thu, 24 Oct 2024 16:55:45 -0400 Subject: [PATCH 3/3] Update test data for version 1.0.27 --- test/amrfinder_integration2.expected | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/test/amrfinder_integration2.expected b/test/amrfinder_integration2.expected index 9988894..320ccc2 100644 --- a/test/amrfinder_integration2.expected +++ b/test/amrfinder_integration2.expected @@ -1,8 +1,8 @@ #Protein id Contig id Start Stop Strand Element symbol Element name Scope Type Subtype Class Subclass Method Target length Reference sequence length % Coverage of reference % Identity to reference Alignment length Closest reference accession Closest reference name HMM accession HMM description Hierarchy node -NA partial 27 1048 + stx2_operon Partial stx2 operon plus VIRULENCE STX_TYPE STX2 STX2 PARTIAL 1022 99.41 337 AAA16362.1, AAS07607.1 Shiga toxin stx2 NA NA stxA2c, stxB2a -NA partial_contig_end 3 661 - stx2_operon Partial stx2 operon plus VIRULENCE STX_TYPE STX2 STX2 PARTIAL_CONTIG_END 659 100.00 216 AAM70046.1, AAA16362.1 Shiga toxin stx2 NA NA stxB2a, stxA2c -NA stx1a 218 1444 + stx1a_operon stx1a operon plus VIRULENCE STX_TYPE STX1 STX1A COMPLETE 1227 100.00 406 AAA98347.1, AAA71894.1 Shiga toxin stx1a NA NA stxA1a, stxB1a -NA stx2_fs 2165 3232 + stx2_operon stx2 operon with frameshift plus VIRULENCE STX_TYPE STX2 STX2 FRAMESHIFT 1068 99.15 355 AAG01033.1, AAA16363.1 Shiga toxin stx2c NA NA stxA2c, stxB2c -NA stx2_novel 216 1456 + stx2_operon Novel stx2 operon plus VIRULENCE STX_TYPE STX2 STX2 COMPLETE_NOVEL 1241 99.76 410 AAA19623.1, AAA16363.1 Shiga toxin stx2c NA NA stxA2c, stxB2c +NA partial 27 1048 + stx2_operon Partial stx2 operon plus VIRULENCE STX_TYPE STX2 STX2 PARTIAL 1022 99.41 337 AAA16362.1, AAS07607.1 Shiga toxin stx2 NA NA stxA2c::stxB2a +NA partial_contig_end 3 661 - stx2_operon Partial stx2 operon plus VIRULENCE STX_TYPE STX2 STX2 PARTIAL_CONTIG_END 659 100.00 216 AAM70046.1, AAA16362.1 Shiga toxin stx2 NA NA stxB2a::stxA2c +NA stx1a 218 1444 + stx1a_operon stx1a operon plus VIRULENCE STX_TYPE STX1 STX1A COMPLETE 1227 100.00 406 AAA98347.1, AAA71894.1 Shiga toxin stx1a NA NA stxA1a::stxB1a +NA stx2_fs 2165 3232 + stx2_operon stx2 operon with frameshift plus VIRULENCE STX_TYPE STX2 STX2 FRAMESHIFT 1068 99.15 355 AAG01033.1, AAA16363.1 Shiga toxin stx2c NA NA stxA2c::stxB2c +NA stx2_novel 216 1456 + stx2_operon Novel stx2 operon plus VIRULENCE STX_TYPE STX2 STX2 COMPLETE_NOVEL 1241 99.76 410 AAA19623.1, AAA16363.1 Shiga toxin stx2c NA NA stxA2c::stxB2c NA stx2_stop 694 1653 + stx2_operon stx2 operon with internal stop plus VIRULENCE STX_TYPE STX2 STX2 INTERNAL_STOP 960 91.25 320 AUM09788.1 Shiga toxin stx2h subunit A NA NA stxA2h -NA stx2c 1298 2538 - stx2c_operon stx2c operon plus VIRULENCE STX_TYPE STX2 STX2C COMPLETE 1241 100.00 410 AAA16363.1, AAS07596.1 Shiga toxin stx2c NA NA stxB2c, stxA2a +NA stx2c 1298 2538 - stx2c_operon stx2c operon plus VIRULENCE STX_TYPE STX2 STX2C COMPLETE 1241 100.00 410 AAA16363.1, AAS07596.1 Shiga toxin stx2c NA NA stxB2c::stxA2a