diff --git a/amrfinder_columns.hpp b/amrfinder_columns.hpp
index 5501b6f..cf25a92 100644
--- a/amrfinder_columns.hpp
+++ b/amrfinder_columns.hpp
@@ -1,4 +1,38 @@
-// AMRFinderPlus column names
+// columns.hpp
+
+/*===========================================================================
+*
+* PUBLIC DOMAIN NOTICE
+* National Center for Biotechnology Information
+*
+* This software/database is a "United States Government Work" under the
+* terms of the United States Copyright Act. It was written as part of
+* the author's official duties as a United States Government employee and
+* thus cannot be copyrighted. This software/database is freely available
+* to the public for use. The National Library of Medicine and the U.S.
+* Government have not placed any restriction on its use or reproduction.
+*
+* Although all reasonable efforts have been taken to ensure the accuracy
+* and reliability of the software and data, the NLM and the U.S.
+* Government do not and cannot warrant the performance or results that
+* may be obtained by using this software or data. The NLM and the U.S.
+* Government disclaim all warranties, express or implied, including
+* warranties of performance, merchantability or fitness for any particular
+* purpose.
+*
+* Please cite the author in any work or product based on this material.
+*
+* ===========================================================================
+*
+* Author: Vyacheslav Brover
+*
+* File Description:
+* AMRFinderPlus column names
+*
+*/
+
+
+
// PD-5085
constexpr const char* prot_colName = "Protein id"; // PD-2534
@@ -18,7 +52,7 @@ constexpr const char* class_colName = "Class";
constexpr const char* subclass_colName = "Subclass";
//
constexpr const char* method_colName = "Method";
-constexpr const char* targetLen_colName = "Element length"; // was: "Target length" ??
+constexpr const char* targetLen_colName = "Target length"; // was: "Element length" (temporarily)
constexpr const char* refLen_colName = "Reference sequence length";
constexpr const char* refCov_colName = "% Coverage of reference";
constexpr const char* refIdent_colName = "% Identity to reference";
@@ -28,3 +62,8 @@ constexpr const char* closestRefName_colName = "Closest reference name";
constexpr const char* hmmAccession_colName = "HMM accession";
constexpr const char* hmmDescr_colName = "HMM description";
constexpr const char* hierarchyNode_colName = "Hierarchy node";
+
+
+// PD-5155
+constexpr const char* fusion_infix = "::"; // was: "/"
+
\ No newline at end of file
diff --git a/common.cpp b/common.cpp
index b53b39f..f77af4c 100644
--- a/common.cpp
+++ b/common.cpp
@@ -297,7 +297,7 @@ string getStack ()
char** strings = backtrace_symbols (buffer, nptrs);
if (strings /*&& ! which ("addr2line"). empty ()*/)
{
- FOR_START (int, i, 1, nptrs)
+ FOR_REV_END (int, i, 1, nptrs)
s += string (strings [i]) + "\n";
s += "Use: addr2line -f -C -e " + programArgs [0] + " -a
";
//free (strings);
@@ -320,6 +320,10 @@ string getStack ()
+//bool InputError::on = false;
+
+
+
//
@@ -340,6 +344,15 @@ bool isRedirected (const ostream &os)
+void sleepNano (long nanoSec)
+{
+ const timespec request = {0, nanoSec};
+ timespec remaining;
+ EXEC_ASSERT (! nanosleep (& request, & remaining));
+}
+
+
+
void beep ()
{
if (getEnv ("SHLVL") != "1")
@@ -364,7 +377,7 @@ void Chronometer::start ()
{
if (! on ())
return;
- if (startTime != noclock)
+ if (started ())
throw runtime_error (FUNC "Chronometer \"" + name + "\" is not stopped");
startTime = clock ();
}
@@ -375,7 +388,7 @@ void Chronometer::stop ()
{
if (! on ())
return;
- if (startTime == noclock)
+ if (! started ())
throw runtime_error (FUNC "Chronometer \"" + name + "\" is not started");
time += clock () - startTime;
startTime = noclock;
@@ -533,6 +546,23 @@ string nonPrintable2str (char c)
+string to_url (const string &s)
+{
+ string url;
+ for (const char c : s)
+ if ( isLetter (c)
+ || isDigit (c)
+ || c == '_'
+ )
+ url += c;
+ else
+ url += "%" + uchar2hex ((uchar) c);
+
+ return url;
+}
+
+
+
bool isRight (const string &s,
const string &right)
{
@@ -1241,7 +1271,7 @@ void removeDirectory (const string &dirName)
{
case Filetype::link:
if (unlink (name. c_str ()))
- throw runtime_error ("cannot unlink " + strQuote (name));
+ throw logic_error ("cannot unlink " + strQuote (name));
break;
case Filetype::dir:
removeDirectory (name);
@@ -1250,17 +1280,45 @@ void removeDirectory (const string &dirName)
removeFile (name);
break;
default:
- throw runtime_error ("Cannot remove directory item " + strQuote (name) + " of type " + strQuote (filetype2name (t)));
+ throw logic_error ("Cannot remove directory item " + strQuote (name) + " of type " + strQuote (filetype2name (t)));
}
}
if (rmdir (dirName. c_str ()))
- throw runtime_error ("Cannot remove directory " + strQuote (dirName));
+ throw logic_error ("Cannot remove directory " + strQuote (dirName));
+}
+
+
+
+string makeTempDir ()
+{
+ string tmpDir (getEnv ("TMPDIR"));
+ if (tmpDir. empty ())
+ tmpDir = "/tmp";
+
+ string tmp = tmpDir + "/" + programName + ".XXXXXX";
+ if (! mkdtemp (var_cast (tmp. c_str ())))
+ throw runtime_error ("Error creating a temporary directory in " + tmpDir);
+ if (tmp. empty ())
+ throw runtime_error ("Cannot create a temporary directory in " + tmpDir);
+
+ {
+ const string testFName (tmp + "/test");
+ {
+ ofstream f (testFName);
+ f << "abc" << endl;
+ if (! f. good ())
+ throw runtime_error (tmpDir + " is full, make space there or use environment variable TMPDIR to change location for temporary files");
+ }
+ removeFile (testFName);
+ }
+
+ return tmp;
}
void concatTextDir (const string &inDirName,
- const string &outFName)
+ const string &outFName)
{
RawDirItemGenerator dig (0, inDirName, false);
OFStream outF (outFName);
@@ -2058,6 +2116,22 @@ void Progress::report () const
// TextPos
+string TextPos::str () const
+{
+ if (lineNum == -1)
+ return noString;
+ return "line " + to_string (lineNum + 1) + ", " +
+ (eol ()
+ ? "end of line"
+ : last ()
+ ? "last position"
+ : "pos. " + to_string (charNum + 1)
+ ) +
+ ": ";
+}
+
+
+
void TextPos::inc (bool eol_arg)
{
if (eol ())
@@ -2315,7 +2389,7 @@ void Token::readInput (CharInput &in,
qc ();
if (verbose ())
- cout << type2str (type) << ' ' << *this << ' ' << tp. str () << endl;
+ cout << tp. str () << type2str (type) << ' ' << *this << endl;
}
@@ -2470,13 +2544,17 @@ Token TokenInput::get ()
const Token last_ (last);
last = Token ();
if (! last_. empty ())
+ {
+ tp = last_. tp;
return last_;
+ }
for (;;)
{
Token t (ci, dashInName, consecutiveQuotesInText);
if (t. empty ())
break;
+ tp = t. tp;
if (! t. isDelimiter (commentStart))
return t;
ci. getLine ();
@@ -2728,6 +2806,31 @@ char TokenInput::getNextChar (bool unget)
+// BraceInput
+
+void BraceInput::skipComment ()
+{
+ get ('{');
+ size_t n = 1;
+ for (;;)
+ {
+ const Token t (get ());
+ if (t. isDelimiter ('{'))
+ n++;
+ else if (t. isDelimiter ('}'))
+ {
+ ASSERT (n);
+ n--;
+ if (! n)
+ break;
+ }
+ }
+//get (endChar);
+}
+
+
+
+
// IFStream
IFStream::IFStream (const string &pathName)
@@ -3235,8 +3338,8 @@ struct RawDirItemGenerator::Imp
RawDirItemGenerator::RawDirItemGenerator (size_t progress_displayPeriod,
- const string& dirName_arg,
- bool large_arg)
+ const string& dirName_arg,
+ bool large_arg)
: ItemGenerator (0, progress_displayPeriod)
, dirName (dirName_arg)
, imp (new Imp (dirName_arg))
@@ -4030,9 +4133,24 @@ int Application::run (int argc,
jRoot. reset ();
}
}
+ catch (const std::range_error &e) { errorExitStr (string ("Range error: ") + e. what ()); }
+ catch (const std::overflow_error &e) { errorExitStr (string ("Overflow error: ") + e. what ()); }
+ catch (const std::underflow_error &e) { errorExitStr (string ("Underflow error: ") + e. what ()); }
+ catch (const std::system_error &e) { errorExitStr (string ("System error: ") + e. what ()); }
+ catch (const std::runtime_error &e)
+ {
+ beep ();
+ ostream* os = logPtr ? logPtr : & cerr;
+ {
+ const OColor oc (*os, Color::red, true, true);
+ *os << error_caption;
+ }
+ *os << endl << e. what () << endl;
+ exit (1);
+ }
catch (const std::exception &e)
{
- errorExit ((ifS (errno, strerror (errno) + string ("\n")) + e. what ()). c_str ());
+ errorExitStr (ifS (errno, strerror (errno) + string ("\n")) + e. what ());
}
@@ -4046,7 +4164,7 @@ int Application::run (int argc,
ShellApplication::~ShellApplication ()
{
- if (tmpCreated && ! logPtr)
+ if (! tmp. empty () && ! logPtr)
removeDirectory (tmp);
if (startTime)
@@ -4064,16 +4182,6 @@ void ShellApplication::initEnvironment ()
ASSERT (tmp. empty ());
ASSERT (! programArgs. empty ());
- // tmp
- if (useTmp)
- {
- string s (getEnv ("TMPDIR"));
- if (s. empty ())
- tmp = "/tmp";
- else
- tmp = std::move (s);
- }
-
// execDir, programName
execDir = getProgramDirName ();
if (execDir. empty ())
@@ -4097,30 +4205,9 @@ void ShellApplication::initEnvironment ()
void ShellApplication::initVar ()
{
- ASSERT (! tmpCreated);
-
+ ASSERT (tmp. empty ());
if (useTmp)
- {
- const string tmpDir (tmp);
- tmp += "/" + programName + ".XXXXXX";
- if (! mkdtemp (var_cast (tmp. c_str ())))
- throw runtime_error ("Error creating a temporary directory in " + tmpDir);
- if (tmp. empty ())
- throw runtime_error ("Cannot create a temporary directory in " + tmpDir);
-
- {
- const string testFName (tmp + "/test");
- {
- ofstream f (testFName);
- f << "abc" << endl;
- if (! f. good ())
- throw runtime_error (tmpDir + " is full, make space there or use environment variable TMPDIR to change location for temporary files");
- }
- removeFile (testFName);
- }
-
- tmpCreated = true;
- }
+ tmp = makeTempDir ();
stderr. quiet = getQuiet ();
@@ -4154,7 +4241,7 @@ string ShellApplication::getHelp (bool screen) const
void ShellApplication::body () const
{
- if (useTmp)
+ if (! tmp. empty ())
LOG (tmp);
shellBody ();
}
@@ -4199,6 +4286,7 @@ string ShellApplication::exec2str (const string &cmd,
const string &tmpName,
const string &logFName) const
{
+ ASSERT (! tmp. empty ());
ASSERT (! contains (tmpName, ' '));
const string out (tmp + "/" + tmpName);
exec (cmd + " > " + out, logFName);
@@ -4213,6 +4301,7 @@ string ShellApplication::exec2str (const string &cmd,
string ShellApplication::uncompress (const string "edFName,
const string &suffix) const
{
+ ASSERT (! tmp. empty ());
const string res (shellQuote (tmp + "/" + suffix));
QC_ASSERT (quotedFName != res);
const string s (unQuote (quotedFName));
@@ -4229,6 +4318,8 @@ string ShellApplication::uncompress (const string "edFName,
string ShellApplication::getBlastThreadsParam (const string &blast,
size_t threads_max_max) const
{
+ ASSERT (! tmp. empty ());
+
const size_t t = min (threads_max, threads_max_max);
if (t <= 1) // One thread is main
return noString;
diff --git a/common.hpp b/common.hpp
index 3eb5da5..d282cd0 100644
--- a/common.hpp
+++ b/common.hpp
@@ -102,7 +102,6 @@ bool initCommon ();
// Invoked automaticallly
-
// Numeric types
typedef unsigned char uchar;
@@ -149,10 +148,30 @@ constexpr const char* error_caption ("*** ERROR ***");
void errorExitStr (const string &msg);
// For debugger: should not be inline
+ // Invokes: beep()
[[noreturn]] void throwf (const string &s);
// For debugger: should not be inline
- // Invokes: logic_error
+ // Invokes: throw logic_error
+
+
+#if 0
+struct InputError : runtime_error // ??
+{
+ static bool on;
+ // Init: false
+
+ InputError (const string &what_arg)
+ : runtime_error (what_arg)
+ { on = true; }
+};
+#endif
+
+
+
+void sleepNano (long nanoSec);
+
+
void beep ();
// Requires: !isRedirected()
@@ -311,7 +330,7 @@ template
}
template
- bool contains (const array &arr, const T item)
+ inline bool contains (const array &arr, const T item)
{ return indexOf (arr, item) != no_index; }
template
@@ -471,10 +490,17 @@ extern hash str_hash;
extern hash size_hash;
-constexpr size_t hash_class_max = 1000; // PAR
+// PAR
+constexpr size_t small_hash_class_max = 1000;
+constexpr size_t large_hash_class_max = small_hash_class_max * 100;
-inline size_t str2hash_class (const string &s)
- { return str_hash (s) % hash_class_max; }
+inline size_t str2hash_class (const string &s,
+ bool large_hash)
+ { return str_hash (s) % (large_hash
+ ? large_hash_class_max
+ : small_hash_class_max
+ );
+ }
@@ -574,6 +600,7 @@ inline bool isDelimiter (char c)
inline bool isSpace (char c)
{ return c > '\0' && c <= ' ' && isspace (c); }
+string to_url (const string &s);
// char*
@@ -702,6 +729,7 @@ bool goodName (const string &name);
bool isIdentifier (const string& name,
bool dashInName);
+ // Return: true. !empty(), !dashInName => (c \in mame => isLetter(c))
bool isNatural (const string& name,
bool leadingZeroAllowed);
@@ -1065,6 +1093,10 @@ template
}
void swap ()
{ std::swap (P::first, P::second); }
+ bool empty () const
+ { return P::first == T ()
+ && P::second == T ();
+ }
};
@@ -1338,6 +1370,9 @@ void copyText (const string &inFName,
void removeDirectory (const string &dirName);
// With its contents
+
+ string makeTempDir ();
+ // And test
void concatTextDir (const string &inDirName,
const string &outFName);
@@ -1697,6 +1732,29 @@ void exec (const string &cmd,
+struct Lock
+{
+ const bool active;
+private:
+ std::mutex& mtx;
+public:
+
+
+ explicit Lock (std::mutex &mtx_arg,
+ bool active_arg = true)
+ : active (active_arg)
+ , mtx (mtx_arg)
+ { if (active)
+ mtx_arg. lock ();
+ }
+ ~Lock ()
+ { if (active)
+ mtx. unlock ();
+ }
+};
+
+
+
// Threads
extern size_t threads_max;
@@ -2071,6 +2129,18 @@ struct VirtNamed : Root
};
+template
+ inline bool containsNamed (const map &m,
+ const Value* t)
+ { if (! t)
+ return false;
+ const auto& it = m. find (t->getName ());
+ if (it == m. end ())
+ return false;
+ return it->second == t;
+ }
+
+
struct Named : VirtNamed
{
@@ -2208,6 +2278,22 @@ template
n++;
return n;
}
+ bool overlapStart (size_t start,
+ const vector &other) const
+ { const size_t end = min (P::size (), start + other. size ());
+ for (size_t i = start, j = 0; i < end; i++, j++)
+ if ((*this) [i] != other [j])
+ return false;
+ return true;
+ }
+ size_t overlapStart_min (const vector &other,
+ size_t start_min) const
+ { for (size_t start = start_min; start < P::size (); start++)
+ if (overlapStart (start, other))
+ return start;
+ return P::size ();
+ }
+ // a = v.overlapStart_min(v,1), a < v.size(), v.size() % a == 0 => a is the period of v
void checkSorted () const
{ if (! searchSorted)
throw runtime_error ("Vector is not sorted for search");
@@ -2706,6 +2792,11 @@ template
{ P::operator<< (other);
return *this;
}
+ template */>
+ VectorPtr& operator<< (VectorPtr &&other)
+ { P::operator<< (std::move (other));
+ return *this;
+ }
void deleteData ()
{ for (const T* t : *this)
delete t;
@@ -2863,97 +2954,6 @@ template
-struct StringVector : Vector
-{
-private:
- typedef Vector P;
-public:
-
-
- StringVector () = default;
- explicit StringVector (initializer_list init)
- : P (init)
- {}
- StringVector (const string &fName,
- size_t reserve_size,
- bool trimP);
- StringVector (const string &s,
- char sep,
- bool trimP);
- explicit StringVector (size_t n)
- : P (n, noString)
- {}
-
-
- string toString (const string& sep) const;
- string toString () const
- { return toString (noString); }
- bool same (const StringVector &vec,
- const Vector &indexes) const;
- void to_xml (Xml::File &f,
- const string &tag);
- // XML: - at(0)
- at(1)
...
- // Invokes: sort(), clear()
-
-
- struct Hasher
- {
- size_t operator () (const StringVector& vec) const
- { size_t ret = 0;
- for (const string& s : vec)
- ret ^= hash() (s);
- return ret;
- }
- };
-};
-
-
-
-template
- StringVector set2vec (const set &s)
- { StringVector vec; vec. reserve (s. size ());
- for (const Key* key : s)
- { assert (key);
- vec << key->getName ();
- }
- return vec;
- }
-
-
-
-struct Csv
-// Line of Excel .csv-file
-{
-private:
- const string &s;
- size_t pos {0};
-public:
-
-
- explicit Csv (const string &s_arg)
- : s (s_arg)
- {}
-
-
- bool goodPos () const
- { return pos < s. size (); }
- string getWord ();
- // Return: Next word
- // Requires: goodPos()
-private:
- void findChar (char c)
- { while (goodPos () && s [pos] != c)
- pos++;
- }
-};
-
-
-
-StringVector csvLine2vec (const string &line);
- // Invokes: Csv
-
-
-
template
struct Set : set
{
@@ -3188,6 +3188,101 @@ template
+struct StringVector : Vector
+{
+private:
+ typedef Vector P;
+public:
+
+
+ StringVector () = default;
+ explicit StringVector (initializer_list init)
+ : P (init)
+ {}
+ explicit StringVector (const Set &from)
+ { insertAll (*this, from);
+ searchSorted = true;
+ }
+ StringVector (const string &fName,
+ size_t reserve_size,
+ bool trimP);
+ StringVector (const string &s,
+ char sep,
+ bool trimP);
+ explicit StringVector (size_t n)
+ : P (n, noString)
+ {}
+
+
+ string toString (const string& sep) const;
+ string toString () const
+ { return toString (noString); }
+ bool same (const StringVector &vec,
+ const Vector &indexes) const;
+ void to_xml (Xml::File &f,
+ const string &tag);
+ // XML: - at(0)
- at(1)
...
+ // Invokes: sort(), clear()
+
+
+ struct Hasher
+ {
+ size_t operator () (const StringVector& vec) const
+ { size_t ret = 0;
+ for (const string& s : vec)
+ ret ^= hash() (s);
+ return ret;
+ }
+ };
+};
+
+
+
+template
+ StringVector set2vec (const set &s)
+ { StringVector vec; vec. reserve (s. size ());
+ for (const Key* key : s)
+ { assert (key);
+ vec << key->getName ();
+ }
+ return vec;
+ }
+
+
+
+struct Csv
+// Line of Excel .csv-file
+{
+private:
+ const string &s;
+ size_t pos {0};
+public:
+
+
+ explicit Csv (const string &s_arg)
+ : s (s_arg)
+ {}
+
+
+ bool goodPos () const
+ { return pos < s. size (); }
+ string getWord ();
+ // Return: Next word
+ // Requires: goodPos()
+private:
+ void findChar (char c)
+ { while (goodPos () && s [pos] != c)
+ pos++;
+ }
+};
+
+
+
+StringVector csvLine2vec (const string &line);
+ // Invokes: Csv
+
+
+
template
struct RandomSet
// Set stored in a vector for a random access
@@ -3260,7 +3355,6 @@ template
public:
-
explicit Heap (const CompareInt &comp_arg,
const SetHeapIndex &setHeapIndex_arg = nullptr,
size_t toReserve = 0)
@@ -3372,9 +3466,9 @@ template
// Test
static void testStr ()
- { StringVector vec {"Moscow", "San Diego", "Los Angeles", "Paris"};
- Heap heap (strComp);
- for (string& s : vec)
+ { const StringVector vec {"Moscow", "San Diego", "Los Angeles", "Paris"};
+ Heap heap (strComp);
+ for (const string& s : vec)
heap << & s;
while (! heap. empty ())
{ cout << * heap. getMaximum () << endl;
@@ -3386,8 +3480,8 @@ template
const void* s2)
{ const string& s1_ = * static_cast (s1);
const string& s2_ = * static_cast (s2);
- if (s1_ > s2_) return -1;
if (s1_ < s2_) return 1;
+ if (s1_ > s2_) return -1;
return 0;
}
};
@@ -3453,8 +3547,14 @@ struct Chronometer : Nocopy
bool on () const
{ return enabled && threads_max == 1; }
+ bool started () const
+ { return startTime != noclock; }
void start ();
+ // Requires: !started()
void stop ();
+ // Requires: started()
+ void cancel ()
+ { startTime = noclock; }
void print (ostream &os) const;
};
@@ -3594,15 +3694,7 @@ struct TextPos
{ return lineNum == other. lineNum
&& charNum == other. charNum;
}
- string str () const
- { return "line " + to_string (lineNum + 1) + ", " +
- (eol ()
- ? "end of line"
- : last ()
- ? "last position"
- : "pos. " + to_string (charNum + 1)
- );
- }
+ string str () const;
void inc (bool eol_arg);
@@ -3616,12 +3708,12 @@ struct TextPos
struct Error : runtime_error
- { Error (const TextPos tp,
- const string &what,
- bool expected = true)
- : runtime_error ((tp. str () + ": " + what + ifS (expected, " is expected")). c_str ())
- {}
- };
+ { Error (const TextPos tp,
+ const string &what,
+ bool expected = true)
+ : runtime_error ((tp. str () + what + ifS (expected, " is expected")). c_str ())
+ {}
+ };
};
@@ -3678,8 +3770,6 @@ struct LineInput : Input
bool nextLine ();
// Output: line
- // Update: tp
- // Invokes: trimTrailing()
bool expectPrefix (const string &prefix,
bool eofAllowed)
{ if (nextLine () && trimPrefix (line, prefix))
@@ -3764,9 +3854,9 @@ struct Token : Root
enum Type { eName
, eDelimiter
, eText
- , eInteger
+ , eInteger // 10-based or 16-based: 0xNNNN...
, eDouble
- , eDateTime
+ , eDateTime // Example: 2018-08-13T16:12:54.487
};
// Valid if !empty()
Type type {eDelimiter};
@@ -3840,6 +3930,9 @@ struct Token : Root
{ *this = Token (); }
+ [[noreturn]] void error (const string &what,
+ bool expected = true) const
+ { throw TextPos::Error (tp, what, expected); }
static string type2str (Type type)
{ switch (type)
{ case eName: return "name";
@@ -3903,6 +3996,7 @@ struct TokenInput : Root
const bool consecutiveQuotesInText;
// Two quotes encode one quote
Token last;
+ TextPos tp;
public:
@@ -3928,13 +4022,9 @@ struct TokenInput : Root
{}
- [[noreturn]] void error (const Token &wrongToken,
- const string &what,
- bool expected = true) const
- { throw TextPos::Error (wrongToken. tp, what, expected); }
[[noreturn]] void error (const string &what,
bool expected = true) const
- { ci. error (what, expected); }
+ { throw TextPos::Error (tp, what, expected); }
Token get ();
// Return: empty() <=> EOF
@@ -3952,17 +4042,17 @@ struct TokenInput : Root
void get (const string &expected)
{ const Token t (get ());
if (! t. isNameText (expected))
- error (t, Token::type2str (Token::eName) + " " + strQuote (expected));
+ t. error (Token::type2str (Token::eName) + " " + strQuote (expected));
}
void get (int expected)
{ const Token t (get ());
if (! t. isInteger (expected))
- error (t, Token::type2str (Token::eInteger) + " " + to_string (expected));
+ t. error (Token::type2str (Token::eInteger) + " " + to_string (expected));
}
void get (double expected)
{ const Token t (get ());
if (! t. isDouble (expected))
- error (t, Token::type2str (Token::eDouble) + " " + toString (expected));
+ t. error (Token::type2str (Token::eDouble) + " " + toString (expected));
}
void get (char expected)
{ const Token t (get ());
@@ -3970,7 +4060,9 @@ struct TokenInput : Root
error (Token::type2str (Token::eDelimiter) + " " + strQuote (toString (expected), '\''));
}
void setLast (Token &&t)
- { last = std::move (t); }
+ { last = std::move (t);
+ tp = last. tp;
+ }
bool getNext (char expected)
{ Token token (get ());
if (! token. isDelimiter (expected))
@@ -3984,6 +4076,29 @@ struct TokenInput : Root
+struct BraceInput : TokenInput
+{
+ static constexpr char commentC {'#'};
+ static constexpr const char* commentS {"comment"};
+ static constexpr char endChar {';'};
+
+
+ explicit BraceInput (const string &fName)
+ : TokenInput (fName, commentC)
+ {}
+ explicit BraceInput (istream &is_arg)
+ : TokenInput (is_arg, commentC)
+ {}
+
+
+ static string endS ()
+ { return string (1, endChar); }
+ void skipComment ();
+};
+
+
+
+
///////////////////////////////////// Json //////////////////////////////////////////
extern unique_ptr jRoot;
@@ -4639,6 +4754,7 @@ struct Application : Singleton, Root
int run (int argc,
const char* argv []);
// Invokes: body()
+ // if runtime_error then exit(1) else errorExit()
private:
virtual void body () const = 0;
// Invokes: initEnvironment()
@@ -4656,9 +4772,7 @@ struct ShellApplication : Application
string tmp;
// Temporary directory: ($TMPDIR or "/tmp") + "/" + programName + "XXXXXX"
// If log is used then tmp is printed in the log file and the temporary files are not deleted
-private:
- bool tmpCreated {false};
-protected:
+ // !empty() => useTmp
string execDir;
// Ends with '/'
// Physically real directory of the software
@@ -4700,7 +4814,7 @@ struct ShellApplication : Application
string exec2str (const string &cmd,
const string &tmpName,
const string &logFName = noString) const;
- // Return: `cmd > /tmpName && cat /tmpName`
+ // Return: $( cmd > /tmpName && cat /tmpName )
// Requires: cmd produces one line
string uncompress (const string "edFName,
const string &suffix) const;
diff --git a/common.inc b/common.inc
index b72d4b4..a662b9b 100644
--- a/common.inc
+++ b/common.inc
@@ -73,7 +73,7 @@
#define ERROR_MSG(msg) \
{ if (! std::uncaught_exceptions ()) \
- throw runtime_error (std::string ("\"" __FILE__ "\", line ") + to_string (__LINE__) + ", in " + (FUNC) + (msg)); \
+ throwf (std::string ("\"" __FILE__ "\", line ") + to_string (__LINE__) + ", in " + (FUNC) + (msg)); \
exit (1); \
}
#define ERROR ERROR_MSG ("ERROR")
diff --git a/stxtyper.cpp b/stxtyper.cpp
index 9650c5d..7e365fc 100644
--- a/stxtyper.cpp
+++ b/stxtyper.cpp
@@ -32,6 +32,8 @@
* Dependencies: NCBI BLAST, gunzip (optional)
*
* Release changes:
+* 1.0.27 10/23/2024 PD-5155 "Hierarchy node" with mixed types is ::
+* 1.0.26 10/22/2024 PD-5085 Change column "Element length" to "Target length"
* 1.0.25 08/16/2024 PD-5085 AMRFinderPlus column names to match MicroBIGG-E
* 1.0.24 08/05/2024 PD-5076 "na" -> "NA"
* 1.0.23 07/29/2024 PD-5064 AMBIGUOUS operon type
@@ -580,7 +582,7 @@ struct Operon
//const double refCoverage = double (al1->getAbsCoverage () + al2->getAbsCoverage ()) / double (refLen) * 100.0;
const size_t alignmentLen = al1->length + al2->length;
const string refAccessions (al1->refAccession + ", " + al2->refAccession);
- const string fam (al1->getGenesymbol () + ", " + al2->getGenesymbol ());
+ const string fam (al1->getGenesymbol () + fusion_infix + al2->getGenesymbol ());
td << na // 1 "Protein identifier"
<< targetName // 2 "Contig id"
<< start // 3 "Start"
@@ -649,7 +651,7 @@ struct Operon
return al1->stxType;
if (al1->stxClass != al2->stxClass)
{
- //return al1->stxClass + "/" + al2->stxClass; ?? // order alphabetically
+ //return al1->stxClass + fusion_infix + al2->stxClass; // order alphabetically
if (al1->stxSuperClass == al2->stxSuperClass)
return al1->stxSuperClass;
return noString;
diff --git a/test/amrfinder_integration.expected b/test/amrfinder_integration.expected
index b447c3b..4e96184 100644
--- a/test/amrfinder_integration.expected
+++ b/test/amrfinder_integration.expected
@@ -1,4 +1,4 @@
-#Protein id Contig id Start Stop Strand Element symbol Element name Scope Type Subtype Class Subclass Method Element length Reference sequence length % Coverage of reference % Identity to reference Alignment length Closest reference accession Closest reference name HMM accession HMM description
+#Protein id Contig id Start Stop Strand Element symbol Element name Scope Type Subtype Class Subclass Method Target length Reference sequence length % Coverage of reference % Identity to reference Alignment length Closest reference accession Closest reference name HMM accession HMM description
NA partial 27 1048 + stx2_operon Partial stx2 operon plus VIRULENCE STX_TYPE STX2 STX2 PARTIAL 1022 99.41 337 AAA16362.1, AAS07607.1 Shiga toxin stx2 NA NA
NA partial_contig_end 3 661 - stx2_operon Partial stx2 operon plus VIRULENCE STX_TYPE STX2 STX2 PARTIAL_CONTIG_END 659 100.00 216 AAM70046.1, AAA16362.1 Shiga toxin stx2 NA NA
NA stx1a 218 1444 + stx1a_operon stx1a operon plus VIRULENCE STX_TYPE STX1 STX1A COMPLETE 1227 100.00 406 AAA98347.1, AAA71894.1 Shiga toxin stx1a NA NA
diff --git a/test/amrfinder_integration2.expected b/test/amrfinder_integration2.expected
index e16e1b5..320ccc2 100644
--- a/test/amrfinder_integration2.expected
+++ b/test/amrfinder_integration2.expected
@@ -1,8 +1,8 @@
-#Protein id Contig id Start Stop Strand Element symbol Element name Scope Type Subtype Class Subclass Method Element length Reference sequence length % Coverage of reference % Identity to reference Alignment length Closest reference accession Closest reference name HMM accession HMM description Hierarchy node
-NA partial 27 1048 + stx2_operon Partial stx2 operon plus VIRULENCE STX_TYPE STX2 STX2 PARTIAL 1022 99.41 337 AAA16362.1, AAS07607.1 Shiga toxin stx2 NA NA stxA2c, stxB2a
-NA partial_contig_end 3 661 - stx2_operon Partial stx2 operon plus VIRULENCE STX_TYPE STX2 STX2 PARTIAL_CONTIG_END 659 100.00 216 AAM70046.1, AAA16362.1 Shiga toxin stx2 NA NA stxB2a, stxA2c
-NA stx1a 218 1444 + stx1a_operon stx1a operon plus VIRULENCE STX_TYPE STX1 STX1A COMPLETE 1227 100.00 406 AAA98347.1, AAA71894.1 Shiga toxin stx1a NA NA stxA1a, stxB1a
-NA stx2_fs 2165 3232 + stx2_operon stx2 operon with frameshift plus VIRULENCE STX_TYPE STX2 STX2 FRAMESHIFT 1068 99.15 355 AAG01033.1, AAA16363.1 Shiga toxin stx2c NA NA stxA2c, stxB2c
-NA stx2_novel 216 1456 + stx2_operon Novel stx2 operon plus VIRULENCE STX_TYPE STX2 STX2 COMPLETE_NOVEL 1241 99.76 410 AAA19623.1, AAA16363.1 Shiga toxin stx2c NA NA stxA2c, stxB2c
+#Protein id Contig id Start Stop Strand Element symbol Element name Scope Type Subtype Class Subclass Method Target length Reference sequence length % Coverage of reference % Identity to reference Alignment length Closest reference accession Closest reference name HMM accession HMM description Hierarchy node
+NA partial 27 1048 + stx2_operon Partial stx2 operon plus VIRULENCE STX_TYPE STX2 STX2 PARTIAL 1022 99.41 337 AAA16362.1, AAS07607.1 Shiga toxin stx2 NA NA stxA2c::stxB2a
+NA partial_contig_end 3 661 - stx2_operon Partial stx2 operon plus VIRULENCE STX_TYPE STX2 STX2 PARTIAL_CONTIG_END 659 100.00 216 AAM70046.1, AAA16362.1 Shiga toxin stx2 NA NA stxB2a::stxA2c
+NA stx1a 218 1444 + stx1a_operon stx1a operon plus VIRULENCE STX_TYPE STX1 STX1A COMPLETE 1227 100.00 406 AAA98347.1, AAA71894.1 Shiga toxin stx1a NA NA stxA1a::stxB1a
+NA stx2_fs 2165 3232 + stx2_operon stx2 operon with frameshift plus VIRULENCE STX_TYPE STX2 STX2 FRAMESHIFT 1068 99.15 355 AAG01033.1, AAA16363.1 Shiga toxin stx2c NA NA stxA2c::stxB2c
+NA stx2_novel 216 1456 + stx2_operon Novel stx2 operon plus VIRULENCE STX_TYPE STX2 STX2 COMPLETE_NOVEL 1241 99.76 410 AAA19623.1, AAA16363.1 Shiga toxin stx2c NA NA stxA2c::stxB2c
NA stx2_stop 694 1653 + stx2_operon stx2 operon with internal stop plus VIRULENCE STX_TYPE STX2 STX2 INTERNAL_STOP 960 91.25 320 AUM09788.1 Shiga toxin stx2h subunit A NA NA stxA2h
-NA stx2c 1298 2538 - stx2c_operon stx2c operon plus VIRULENCE STX_TYPE STX2 STX2C COMPLETE 1241 100.00 410 AAA16363.1, AAS07596.1 Shiga toxin stx2c NA NA stxB2c, stxA2a
+NA stx2c 1298 2538 - stx2c_operon stx2c operon plus VIRULENCE STX_TYPE STX2 STX2C COMPLETE 1241 100.00 410 AAA16363.1, AAS07596.1 Shiga toxin stx2c NA NA stxB2c::stxA2a
diff --git a/tsv.hpp b/tsv.hpp
index 26af24f..261a024 100644
--- a/tsv.hpp
+++ b/tsv.hpp
@@ -167,6 +167,7 @@ struct TextTable : Named
// size() = number of columns
Vector rows;
// StringVector::size() = header.size()
+ // Values are trim()'ed
typedef size_t ColNum;
// no_index <=> no column
typedef size_t RowNum;
diff --git a/version.txt b/version.txt
index 4a4127c..adb7b04 100644
--- a/version.txt
+++ b/version.txt
@@ -1 +1 @@
-1.0.25
+1.0.27