Skip to content

Commit

Permalink
#1282 SMARTS load/save errors in Indigo.
Browse files Browse the repository at this point in the history
Fix UT
  • Loading branch information
Aliaksandr Dziarkach committed Sep 21, 2023
1 parent 33e3979 commit d911bb0
Show file tree
Hide file tree
Showing 7 changed files with 61 additions and 48 deletions.
2 changes: 1 addition & 1 deletion api/tests/integration/ref/basic/load_structure.py.out
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ C1C=CC=CC=1*>>C1C=C(*)C=CC=1
C1C=CC=CC=1* |$;;;;;;A$|

(** #8 **): Smarts, (smarts parameter):
[#8AH]-[c]1-,:[c]-,:[c]-,:[c]-,:[c]-,:[c]-,:1
[O;H]-[c]1-,:[c]-,:[c]-,:[c]-,:[c]-,:[c]-,:1

(** #9 **): Smarts, (query parameter):
[OH]C1:C:C:C:C:C:1
Expand Down
12 changes: 7 additions & 5 deletions api/tests/integration/ref/formats/smarts.py.out
Original file line number Diff line number Diff line change
@@ -1,18 +1,18 @@
**** Load and Save as Query ****
[#6]-[#6]-[#6+5]-[#6]-[#6]-[#6]-[#6]-[#6]
[#6]-[#6]-[#6;+5]-[#6]-[#6]-[#6]-[#6]-[#6]
CC[C+5]CCCCC
**** Load and Save as Molecule ****
[#6]-[#6]-[#6+5]-[#6]-[#6]-[#6]-[#6]-[#6]
[#6]-[#6]-[#6;+5]-[#6]-[#6]-[#6]-[#6]-[#6]
CC[C+5]CCCCC
**** Load and Save as Query with not list ****
[#6]-[#6]-[!#5!#6!#7]-[#6]-[#6]-[#6]-[#6]-[#6]-[#6]
**** Load and Save as Query with component-level grouping ****
([#8].[#6]) is ok. smarts_in==smarts_out
([#8].[#6]).([#8].[#6]) is ok. smarts_in==smarts_out
[!C!b] is ok. smarts_in==smarts_out
[!C;!b] is ok. smarts_in==smarts_out
[*] is ok. smarts_in==smarts_out
[*R1] is ok. smarts_in==smarts_out
[*R3] is ok. smarts_in==smarts_out
[*;R1] is ok. smarts_in==smarts_out
[*;R3] is ok. smarts_in==smarts_out
[r] is ok. smarts_in==smarts_out
[r0] is ok. smarts_in==smarts_out
[r1] is ok. smarts_in==smarts_out
Expand All @@ -24,3 +24,5 @@ CC[C+5]CCCCC
[#6]@[#6] is ok. smarts_in==smarts_out
[#9]/[#6] is ok. smarts_in==smarts_out
[#9]/[#6]=[C]/[#17] is ok. smarts_in==smarts_out
[O;H] is ok. smarts_in==smarts_out
[!O;H] is ok. smarts_in==smarts_out
8 changes: 5 additions & 3 deletions api/tests/integration/tests/formats/smarts.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,10 +98,10 @@ def test_smarts_load_save(smarts_in):
test_smarts_load_save("([#8].[#6])")
test_smarts_load_save("([#8].[#6]).([#8].[#6])")

test_smarts_load_save("[!C!b]")
test_smarts_load_save("[!C;!b]")
test_smarts_load_save("[*]")
test_smarts_load_save("[*R1]")
test_smarts_load_save("[*R3]")
test_smarts_load_save("[*;R1]")
test_smarts_load_save("[*;R3]")
test_smarts_load_save("[r]")
test_smarts_load_save("[r0]")
test_smarts_load_save("[r1]")
Expand All @@ -113,3 +113,5 @@ def test_smarts_load_save(smarts_in):
test_smarts_load_save("[#6]@[#6]")
test_smarts_load_save("[#9]/[#6]")
test_smarts_load_save("[#9]/[#6]=[C]/[#17]")
test_smarts_load_save("[O;H]")
test_smarts_load_save("[!O;H]")
17 changes: 8 additions & 9 deletions core/indigo-core/common/base_cpp/output.h
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,10 @@ namespace indigo

virtual void write(const void* data, int size) = 0;
virtual void flush() = 0;
virtual long long tell() const noexcept
{
return 0;
}

virtual void writeByte(byte value);

Expand All @@ -63,18 +67,13 @@ namespace indigo
void printfCR(const char* format, ...);
};

class DLLEXPORT OutputTell
{
virtual long long tell() const noexcept = 0;
};

class DLLEXPORT OutputSeek
{
virtual void seek(long long offset, int from) = 0;
void skip(int count);
};

class DLLEXPORT FileOutput : public Output, public OutputSeek, public OutputTell
class DLLEXPORT FileOutput : public Output, public OutputSeek
{
public:
FileOutput(Encoding filename_encoding, const char* filename);
Expand All @@ -96,7 +95,7 @@ namespace indigo
FILE* _file;
};

class DLLEXPORT ArrayOutput : public Output, public OutputTell
class DLLEXPORT ArrayOutput : public Output
{
public:
explicit ArrayOutput(Array<char>& arr);
Expand All @@ -111,7 +110,7 @@ namespace indigo
Array<char>& _arr;
};

class DLLEXPORT StringOutput : public Output, public OutputTell
class DLLEXPORT StringOutput : public Output
{
public:
StringOutput() = delete;
Expand All @@ -127,7 +126,7 @@ namespace indigo
std::string& _str;
};

class DLLEXPORT StandardOutput : public Output, public OutputTell
class DLLEXPORT StandardOutput : public Output
{
public:
explicit StandardOutput();
Expand Down
2 changes: 1 addition & 1 deletion core/indigo-core/common/gzip/gzip_output.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
namespace indigo
{

class GZipOutput : public Output, OutputTell
class GZipOutput : public Output
{
public:
enum
Expand Down
58 changes: 34 additions & 24 deletions core/indigo-core/molecule/src/smiles_saver.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -971,42 +971,52 @@ void SmilesSaver::_writeSmartsAtom(int idx, QueryMolecule::Atom* atom, int chira
break;
}
case QueryMolecule::OP_AND: {
// Convert a & #6 -> c, A & #6 -> C
if (atom->children.size() == 2 &&
((atom->children[0]->type == QueryMolecule::ATOM_AROMATICITY && atom->children[1]->type == QueryMolecule::ATOM_NUMBER) ||
(atom->children[1]->type == QueryMolecule::ATOM_AROMATICITY && atom->children[0]->type == QueryMolecule::ATOM_NUMBER)))
bool has_number = false;
bool has_aromatic = false;
bool aromatic = false;
char atom_name[10];
int cur_pos = _output.tell();
for (i = 0; i < atom->children.size(); i++)
{
int number = -1;
bool aromatic = false;
char atom_name[10];
if (atom->children[0]->type == QueryMolecule::ATOM_NUMBER)
if (atom->children[i]->type == QueryMolecule::ATOM_NUMBER)
{
number = static_cast<QueryMolecule::Atom*>(atom->children[0])->value_max;
aromatic = static_cast<QueryMolecule::Atom*>(atom->children[1])->value_min == ATOM_AROMATIC;
has_number = true;
strncpy(atom_name, Element::toString(static_cast<QueryMolecule::Atom*>(atom->children[0])->value_max), sizeof(atom_name));
}
else
if (atom->children[i]->type == QueryMolecule::ATOM_AROMATICITY)
{
aromatic = static_cast<QueryMolecule::Atom*>(atom->children[0])->value_min == ATOM_AROMATIC;
number = static_cast<QueryMolecule::Atom*>(atom->children[1])->value_max;
has_aromatic = true;
aromatic = static_cast<QueryMolecule::Atom*>(atom->children[i])->value_min == ATOM_AROMATIC;
}
strncpy(atom_name, Element::toString(number), sizeof(atom_name));
}
if (has_aromatic && has_number)
{ // Convert a & #6 -> c, A & #6 -> C
if (aromatic)
atom_name[0] = tolower(atom_name[0]);
_output.printf("%s", atom_name);
}
else
for (i = 0; i < atom->children.size(); i++)
{
for (i = 0; i < atom->children.size(); i++)
if (has_aromatic && has_number &&
(atom->children[i]->type == QueryMolecule::ATOM_AROMATICITY || atom->children[i]->type == QueryMolecule::ATOM_NUMBER))
{
if (atom->children[i]->type == QueryMolecule::ATOM_RADICAL || atom->children[i]->type == QueryMolecule::ATOM_VALENCE)
{
continue;
}
continue;
}
if (atom->children[i]->type == QueryMolecule::ATOM_RADICAL || atom->children[i]->type == QueryMolecule::ATOM_VALENCE)
{
continue;
}
if (atom->children[i]->type == QueryMolecule::OP_NOT && atom->children[i]->artificial)
{
continue;
}

if (i > 0)
writeAnd(_output, atom, has_or_parent);
_writeSmartsAtom(idx, (QueryMolecule::Atom*)atom->children[i], chirality, depth + 1, has_or_parent, has_not_parent);
if (_output.tell() > cur_pos)
{
_output.writeChar(has_or_parent ? '&' : ';');
cur_pos = _output.tell();
}
_writeSmartsAtom(idx, (QueryMolecule::Atom*)atom->children[i], chirality, depth + 1, has_or_parent, has_not_parent);
}
break;
}
Expand Down Expand Up @@ -1187,7 +1197,7 @@ void SmilesSaver::_writeSmartsBond(int idx, QueryMolecule::Bond* bond, bool has_
for (i = 0; i < bond->children.size(); i++)
{
if (i > 0)
writeAnd(_output, bond, has_or_parent);
_output.writeChar(has_or_parent ? '&' : ';');
_writeSmartsBond(idx, (QueryMolecule::Bond*)bond->children[i], has_or_parent);
}
break;
Expand Down
10 changes: 5 additions & 5 deletions core/indigo-core/tests/tests/smarts.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -114,15 +114,15 @@ TEST_F(IndigoCoreSmartsTest, h)
EXPECT_FALSE(substructureMatch("C", "[*h5]"));
EXPECT_FALSE(substructureMatch("C([H])([H])([H])[H]", "[Ch]"));

EXPECT_STREQ(smilesLoadSaveLoad("[#6;h]", true).c_str(), "[#6h]");
EXPECT_STREQ(smilesLoadSaveLoad("[#6;h2]", true).c_str(), "[#6h2]");
EXPECT_STREQ(smilesLoadSaveLoad("[#6;h]", true).c_str(), "[#6;h]");
EXPECT_STREQ(smilesLoadSaveLoad("[#6;h2]", true).c_str(), "[#6;h2]");
}

TEST_F(IndigoCoreSmartsTest, connectivity)
{
EXPECT_STREQ(smilesLoadSaveLoad("[#7;X3]", true).c_str(), "[#7X3]");
EXPECT_STREQ(smilesLoadSaveLoad("[#7X3]", true).c_str(), "[#7X3]");
EXPECT_STREQ(smilesLoadSaveLoad("[NX3]", true).c_str(), "[#7X3]");
EXPECT_STREQ(smilesLoadSaveLoad("[#7;X3]", true).c_str(), "[#7;X3]");
EXPECT_STREQ(smilesLoadSaveLoad("[#7X3]", true).c_str(), "[#7;X3]");
EXPECT_STREQ(smilesLoadSaveLoad("[NX3]", true).c_str(), "[#7;X3]");
}

TEST_F(IndigoCoreSmartsTest, aliases)
Expand Down

0 comments on commit d911bb0

Please sign in to comment.