From d911bb063d6ce66afc73f5fd874657af1f9ea549 Mon Sep 17 00:00:00 2001 From: Aliaksandr Dziarkach Date: Thu, 21 Sep 2023 20:13:51 +0300 Subject: [PATCH] #1282 SMARTS load/save errors in Indigo. Fix UT --- .../ref/basic/load_structure.py.out | 2 +- .../integration/ref/formats/smarts.py.out | 12 ++-- api/tests/integration/tests/formats/smarts.py | 8 ++- core/indigo-core/common/base_cpp/output.h | 17 +++--- core/indigo-core/common/gzip/gzip_output.h | 2 +- .../indigo-core/molecule/src/smiles_saver.cpp | 58 +++++++++++-------- core/indigo-core/tests/tests/smarts.cpp | 10 ++-- 7 files changed, 61 insertions(+), 48 deletions(-) diff --git a/api/tests/integration/ref/basic/load_structure.py.out b/api/tests/integration/ref/basic/load_structure.py.out index 32a7d94091..cd0a855d06 100644 --- a/api/tests/integration/ref/basic/load_structure.py.out +++ b/api/tests/integration/ref/basic/load_structure.py.out @@ -26,7 +26,7 @@ C1C=CC=CC=1*>>C1C=C(*)C=CC=1 C1C=CC=CC=1* |$;;;;;;A$| (** #8 **): Smarts, (smarts parameter): -[#8AH]-[c]1-,:[c]-,:[c]-,:[c]-,:[c]-,:[c]-,:1 +[O;H]-[c]1-,:[c]-,:[c]-,:[c]-,:[c]-,:[c]-,:1 (** #9 **): Smarts, (query parameter): [OH]C1:C:C:C:C:C:1 diff --git a/api/tests/integration/ref/formats/smarts.py.out b/api/tests/integration/ref/formats/smarts.py.out index 46e73cc9c1..8c9ad54b94 100644 --- a/api/tests/integration/ref/formats/smarts.py.out +++ b/api/tests/integration/ref/formats/smarts.py.out @@ -1,18 +1,18 @@ **** Load and Save as Query **** -[#6]-[#6]-[#6+5]-[#6]-[#6]-[#6]-[#6]-[#6] +[#6]-[#6]-[#6;+5]-[#6]-[#6]-[#6]-[#6]-[#6] CC[C+5]CCCCC **** Load and Save as Molecule **** -[#6]-[#6]-[#6+5]-[#6]-[#6]-[#6]-[#6]-[#6] +[#6]-[#6]-[#6;+5]-[#6]-[#6]-[#6]-[#6]-[#6] CC[C+5]CCCCC **** Load and Save as Query with not list **** [#6]-[#6]-[!#5!#6!#7]-[#6]-[#6]-[#6]-[#6]-[#6]-[#6] **** Load and Save as Query with component-level grouping **** ([#8].[#6]) is ok. smarts_in==smarts_out ([#8].[#6]).([#8].[#6]) is ok. smarts_in==smarts_out -[!C!b] is ok. smarts_in==smarts_out +[!C;!b] is ok. smarts_in==smarts_out [*] is ok. smarts_in==smarts_out -[*R1] is ok. smarts_in==smarts_out -[*R3] is ok. smarts_in==smarts_out +[*;R1] is ok. smarts_in==smarts_out +[*;R3] is ok. smarts_in==smarts_out [r] is ok. smarts_in==smarts_out [r0] is ok. smarts_in==smarts_out [r1] is ok. smarts_in==smarts_out @@ -24,3 +24,5 @@ CC[C+5]CCCCC [#6]@[#6] is ok. smarts_in==smarts_out [#9]/[#6] is ok. smarts_in==smarts_out [#9]/[#6]=[C]/[#17] is ok. smarts_in==smarts_out +[O;H] is ok. smarts_in==smarts_out +[!O;H] is ok. smarts_in==smarts_out diff --git a/api/tests/integration/tests/formats/smarts.py b/api/tests/integration/tests/formats/smarts.py index bc1fe6096d..b421fd6801 100755 --- a/api/tests/integration/tests/formats/smarts.py +++ b/api/tests/integration/tests/formats/smarts.py @@ -98,10 +98,10 @@ def test_smarts_load_save(smarts_in): test_smarts_load_save("([#8].[#6])") test_smarts_load_save("([#8].[#6]).([#8].[#6])") -test_smarts_load_save("[!C!b]") +test_smarts_load_save("[!C;!b]") test_smarts_load_save("[*]") -test_smarts_load_save("[*R1]") -test_smarts_load_save("[*R3]") +test_smarts_load_save("[*;R1]") +test_smarts_load_save("[*;R3]") test_smarts_load_save("[r]") test_smarts_load_save("[r0]") test_smarts_load_save("[r1]") @@ -113,3 +113,5 @@ def test_smarts_load_save(smarts_in): test_smarts_load_save("[#6]@[#6]") test_smarts_load_save("[#9]/[#6]") test_smarts_load_save("[#9]/[#6]=[C]/[#17]") +test_smarts_load_save("[O;H]") +test_smarts_load_save("[!O;H]") diff --git a/core/indigo-core/common/base_cpp/output.h b/core/indigo-core/common/base_cpp/output.h index 2ae6679a0a..be40558b23 100644 --- a/core/indigo-core/common/base_cpp/output.h +++ b/core/indigo-core/common/base_cpp/output.h @@ -41,6 +41,10 @@ namespace indigo virtual void write(const void* data, int size) = 0; virtual void flush() = 0; + virtual long long tell() const noexcept + { + return 0; + } virtual void writeByte(byte value); @@ -63,18 +67,13 @@ namespace indigo void printfCR(const char* format, ...); }; - class DLLEXPORT OutputTell - { - virtual long long tell() const noexcept = 0; - }; - class DLLEXPORT OutputSeek { virtual void seek(long long offset, int from) = 0; void skip(int count); }; - class DLLEXPORT FileOutput : public Output, public OutputSeek, public OutputTell + class DLLEXPORT FileOutput : public Output, public OutputSeek { public: FileOutput(Encoding filename_encoding, const char* filename); @@ -96,7 +95,7 @@ namespace indigo FILE* _file; }; - class DLLEXPORT ArrayOutput : public Output, public OutputTell + class DLLEXPORT ArrayOutput : public Output { public: explicit ArrayOutput(Array& arr); @@ -111,7 +110,7 @@ namespace indigo Array& _arr; }; - class DLLEXPORT StringOutput : public Output, public OutputTell + class DLLEXPORT StringOutput : public Output { public: StringOutput() = delete; @@ -127,7 +126,7 @@ namespace indigo std::string& _str; }; - class DLLEXPORT StandardOutput : public Output, public OutputTell + class DLLEXPORT StandardOutput : public Output { public: explicit StandardOutput(); diff --git a/core/indigo-core/common/gzip/gzip_output.h b/core/indigo-core/common/gzip/gzip_output.h index d5653fa79e..6101e531c1 100644 --- a/core/indigo-core/common/gzip/gzip_output.h +++ b/core/indigo-core/common/gzip/gzip_output.h @@ -27,7 +27,7 @@ namespace indigo { - class GZipOutput : public Output, OutputTell + class GZipOutput : public Output { public: enum diff --git a/core/indigo-core/molecule/src/smiles_saver.cpp b/core/indigo-core/molecule/src/smiles_saver.cpp index 18fef61047..efa5a69d3a 100644 --- a/core/indigo-core/molecule/src/smiles_saver.cpp +++ b/core/indigo-core/molecule/src/smiles_saver.cpp @@ -971,42 +971,52 @@ void SmilesSaver::_writeSmartsAtom(int idx, QueryMolecule::Atom* atom, int chira break; } case QueryMolecule::OP_AND: { - // Convert a & #6 -> c, A & #6 -> C - if (atom->children.size() == 2 && - ((atom->children[0]->type == QueryMolecule::ATOM_AROMATICITY && atom->children[1]->type == QueryMolecule::ATOM_NUMBER) || - (atom->children[1]->type == QueryMolecule::ATOM_AROMATICITY && atom->children[0]->type == QueryMolecule::ATOM_NUMBER))) + bool has_number = false; + bool has_aromatic = false; + bool aromatic = false; + char atom_name[10]; + int cur_pos = _output.tell(); + for (i = 0; i < atom->children.size(); i++) { - int number = -1; - bool aromatic = false; - char atom_name[10]; - if (atom->children[0]->type == QueryMolecule::ATOM_NUMBER) + if (atom->children[i]->type == QueryMolecule::ATOM_NUMBER) { - number = static_cast(atom->children[0])->value_max; - aromatic = static_cast(atom->children[1])->value_min == ATOM_AROMATIC; + has_number = true; + strncpy(atom_name, Element::toString(static_cast(atom->children[0])->value_max), sizeof(atom_name)); } - else + if (atom->children[i]->type == QueryMolecule::ATOM_AROMATICITY) { - aromatic = static_cast(atom->children[0])->value_min == ATOM_AROMATIC; - number = static_cast(atom->children[1])->value_max; + has_aromatic = true; + aromatic = static_cast(atom->children[i])->value_min == ATOM_AROMATIC; } - strncpy(atom_name, Element::toString(number), sizeof(atom_name)); + } + if (has_aromatic && has_number) + { // Convert a & #6 -> c, A & #6 -> C if (aromatic) atom_name[0] = tolower(atom_name[0]); _output.printf("%s", atom_name); } - else + for (i = 0; i < atom->children.size(); i++) { - for (i = 0; i < atom->children.size(); i++) + if (has_aromatic && has_number && + (atom->children[i]->type == QueryMolecule::ATOM_AROMATICITY || atom->children[i]->type == QueryMolecule::ATOM_NUMBER)) { - if (atom->children[i]->type == QueryMolecule::ATOM_RADICAL || atom->children[i]->type == QueryMolecule::ATOM_VALENCE) - { - continue; - } + continue; + } + if (atom->children[i]->type == QueryMolecule::ATOM_RADICAL || atom->children[i]->type == QueryMolecule::ATOM_VALENCE) + { + continue; + } + if (atom->children[i]->type == QueryMolecule::OP_NOT && atom->children[i]->artificial) + { + continue; + } - if (i > 0) - writeAnd(_output, atom, has_or_parent); - _writeSmartsAtom(idx, (QueryMolecule::Atom*)atom->children[i], chirality, depth + 1, has_or_parent, has_not_parent); + if (_output.tell() > cur_pos) + { + _output.writeChar(has_or_parent ? '&' : ';'); + cur_pos = _output.tell(); } + _writeSmartsAtom(idx, (QueryMolecule::Atom*)atom->children[i], chirality, depth + 1, has_or_parent, has_not_parent); } break; } @@ -1187,7 +1197,7 @@ void SmilesSaver::_writeSmartsBond(int idx, QueryMolecule::Bond* bond, bool has_ for (i = 0; i < bond->children.size(); i++) { if (i > 0) - writeAnd(_output, bond, has_or_parent); + _output.writeChar(has_or_parent ? '&' : ';'); _writeSmartsBond(idx, (QueryMolecule::Bond*)bond->children[i], has_or_parent); } break; diff --git a/core/indigo-core/tests/tests/smarts.cpp b/core/indigo-core/tests/tests/smarts.cpp index 61e85b7d72..d5d3d17d75 100644 --- a/core/indigo-core/tests/tests/smarts.cpp +++ b/core/indigo-core/tests/tests/smarts.cpp @@ -114,15 +114,15 @@ TEST_F(IndigoCoreSmartsTest, h) EXPECT_FALSE(substructureMatch("C", "[*h5]")); EXPECT_FALSE(substructureMatch("C([H])([H])([H])[H]", "[Ch]")); - EXPECT_STREQ(smilesLoadSaveLoad("[#6;h]", true).c_str(), "[#6h]"); - EXPECT_STREQ(smilesLoadSaveLoad("[#6;h2]", true).c_str(), "[#6h2]"); + EXPECT_STREQ(smilesLoadSaveLoad("[#6;h]", true).c_str(), "[#6;h]"); + EXPECT_STREQ(smilesLoadSaveLoad("[#6;h2]", true).c_str(), "[#6;h2]"); } TEST_F(IndigoCoreSmartsTest, connectivity) { - EXPECT_STREQ(smilesLoadSaveLoad("[#7;X3]", true).c_str(), "[#7X3]"); - EXPECT_STREQ(smilesLoadSaveLoad("[#7X3]", true).c_str(), "[#7X3]"); - EXPECT_STREQ(smilesLoadSaveLoad("[NX3]", true).c_str(), "[#7X3]"); + EXPECT_STREQ(smilesLoadSaveLoad("[#7;X3]", true).c_str(), "[#7;X3]"); + EXPECT_STREQ(smilesLoadSaveLoad("[#7X3]", true).c_str(), "[#7;X3]"); + EXPECT_STREQ(smilesLoadSaveLoad("[NX3]", true).c_str(), "[#7;X3]"); } TEST_F(IndigoCoreSmartsTest, aliases)