Skip to content

Commit

Permalink
#1254 SMARTS with component-level grouping saved without '()' (#1266)
Browse files Browse the repository at this point in the history
Co-authored-by: Aliaksandr Dziarkach <[email protected]>
  • Loading branch information
AliaksandrDziarkach and Aliaksandr Dziarkach authored Sep 14, 2023
1 parent ffabd04 commit 88a4ad6
Show file tree
Hide file tree
Showing 12 changed files with 152 additions and 33 deletions.
4 changes: 2 additions & 2 deletions api/cpp/tests/basic/reaction.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,13 +29,13 @@ TEST(Reaction, automapReactionSmarts)
const auto& session = IndigoSession::create();
auto reaction = session->loadReaction("[C:1]=[C:2][C:3].[C:4]=[C:5][C:6]>>[C:3][C:2]=[C:5][C:6]");
reaction.automap(IndigoAutomapMode::CLEAR);
ASSERT_STREQ("[#6]=[#6]-[#6].[#6]=[#6]-[#6]>>[#6]-[#6]=[#6]-[#6] |^3:0,3,^1:1,4,7,8|", reaction.smarts().c_str());
ASSERT_STREQ("[#6]=[#6]-[#6].[#6]=[#6]-[#6]>>[#6]-[#6]=[#6]-[#6]", reaction.smarts().c_str());
}

TEST(Reaction, automapQueryReactionSmarts)
{
const auto& session = IndigoSession::create();
auto reaction = session->loadQueryReaction("[C:1]=[C:2][C:3].[C:4]=[C:5][C:6]>>[C:3][C:2]=[C:5][C:6] |$;;R1;;;R2;R1;;;R2$|");
reaction.automap(IndigoAutomapMode::CLEAR);
ASSERT_STREQ("[#6]=[#6]-[#6].[#6]=[#6]-[#6]>>[#6]-[#6]=[#6]-[#6] |$;;R1;;;R2;R1;;;R2$|", reaction.smarts().c_str());
ASSERT_STREQ("[#6]=[#6]-[#6].[#6]=[#6]-[#6]>>[#6]-[#6]=[#6]-[#6]", reaction.smarts().c_str());
}
3 changes: 3 additions & 0 deletions api/tests/integration/ref/formats/smarts.py.out
Original file line number Diff line number Diff line change
Expand Up @@ -6,3 +6,6 @@ CC[C+5]CCCCC
CC[C+5]CCCCC
**** Load and Save as Query with not list ****
[#6]-[#6]-[!#5!#6!#7]-[#6]-[#6]-[#6]-[#6]-[#6]-[#6]
**** Load and Save as Query with component-level grouping ****
([#8].[#6]) is ok. smarts_in==smarts_out
([#8].[#6]).([#8].[#6]) is ok. smarts_in==smarts_out
1 change: 1 addition & 0 deletions api/tests/integration/ref/rsmiles/rsmiles_smarts.py.out
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
SMARTS component-level grouping load ok
SMARTS component-level grouping save ok
15 changes: 15 additions & 0 deletions api/tests/integration/tests/formats/smarts.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,17 @@ def testSmarts(m):
print(m.smiles())


def test_smarts_load_save(smarts_in):
m = indigo.loadSmarts(smarts_in)
smarts_out = m.smarts()
if smarts_in == smarts_out:
print("%s is ok. smarts_in==smarts_out" % smarts_in)
else:
print("smarts_in!=smarts_out")
print(" smarts_in=%s", smarts_in)
print("smarts_out=%s", smarts_out)


molstr = """
Ketcher 11241617102D 1 1.00000 0.00000 0
Expand Down Expand Up @@ -81,3 +92,7 @@ def testSmarts(m):
print("**** Load and Save as Query with not list ****")
m = indigo.loadQueryMolecule(notlist)
print(m.smarts())

print("**** Load and Save as Query with component-level grouping ****")
test_smarts_load_save("([#8].[#6])")
test_smarts_load_save("([#8].[#6]).([#8].[#6])")
20 changes: 15 additions & 5 deletions api/tests/integration/tests/rsmiles/rsmiles_smarts.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,18 @@

indigo = Indigo()


rxn1 = indigo.loadReactionSmarts("([#8:1].[#6:2])>>([#8:1].[#6:2])")
assert rxn1.countReactants() == 1
assert rxn1.countProducts() == 1
print("SMARTS component-level grouping load ok")
smarts_in = "([#8:1].[#6:2])>>([#8:1].[#6:2])"
rxn1 = indigo.loadReactionSmarts(smarts_in)
if rxn1.countReactants() == 1 and rxn1.countProducts() == 1:
print("SMARTS component-level grouping load ok")
else:
print("SMARTS component-level grouping load failed")
print("rxn1.countReactants()=%s" % rxn1.countReactants())
print("rxn1.countProducts()=%s" % rxn1.countProducts())
smarts_out = rxn1.smarts()
if smarts_in == smarts_out:
print("SMARTS component-level grouping save ok")
else:
print("SMARTS component-level grouping save failed")
print("smart_in=%s" % smarts_in)
print("smart_ou=%s" % smarts_out)
2 changes: 2 additions & 0 deletions core/indigo-core/molecule/query_molecule.h
Original file line number Diff line number Diff line change
Expand Up @@ -372,6 +372,8 @@ namespace indigo
// must belong to different connected components of the target molecule
Array<int> components;

void getComponentNeighbors(std::list<std::unordered_set<int>>& componentNeighbors);

void invalidateAtom(int index, int mask) override;

int getAtomMaxExteralConnectivity(int idx);
Expand Down
20 changes: 20 additions & 0 deletions core/indigo-core/molecule/src/query_molecule.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2140,3 +2140,23 @@ void QueryMolecule::getQueryAtomLabel(int qa, Array<char>& result)
if (it != query_atom_labels.end())
result.readString(it->second.c_str(), true);
}

void QueryMolecule::getComponentNeighbors(std::list<std::unordered_set<int>>& componentNeighbors)
{
std::unordered_map<int, std::unordered_set<int>> componentAtoms;
for (int i = 0; i < components.size(); ++i)
{
int componentId = components[i];
if (componentId > 0)
{ // vertice[i] belongs to component #Id
componentAtoms[componentId].insert(i);
}
}
componentNeighbors.clear();
for (auto elem : componentAtoms)
{
auto atoms = elem.second;
if (atoms.size() > 1)
componentNeighbors.emplace_back(atoms);
}
}
64 changes: 62 additions & 2 deletions core/indigo-core/molecule/src/smiles_saver.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -185,6 +185,47 @@ void SmilesSaver::_saveMolecule()
walk.walk();

const Array<DfsWalk::SeqElem>& v_seq = walk.getSequence();
Array<int> v_to_comp_group;
v_to_comp_group.resize(v_seq.size());
v_to_comp_group.fffill();

if (_qmol != nullptr && smarts_mode && _qmol->components.size() >= v_seq.size())
{
if (v_seq.size() < 1)
return; // No atoms to save
std::set<int> components;
int cur_component = -1;
for (int i = 0; i < v_seq.size(); ++i)
{
// In v_seq each fragment started with vertex which parent == -1
// In SMARTS some fragments could be grouped (component-level grouping)
// In QueryMolecule group number stored in "".components" member. GroupId == 0 means no group defined.
// Each fragment - connected graph, so all vertexes should belong to one group.
// All group fragments should go one by one - in SMARTS its inside "()".
if (v_seq[i].parent_vertex < 0) // New Fragment
{
int new_component = _qmol->components[v_seq[i].idx];
// if component defined for new fragment(id>0) and its different from previous and seen before
if (new_component > 0 && new_component != cur_component && components.count(new_component))
{
// According to the DfsWalk code, the groups components should be neighbors.
// If will be found case when it wrong - add code to rearrange fragments
throw Error("SMARTS fragments need to reaarange.");
}
components.emplace(new_component);
cur_component = new_component;
}
else
{
if (cur_component != _qmol->components[v_seq[i].idx])
{
// Fragment contains atoms from different components - something went wrong
throw Error("Fragment contains atoms from different components.");
}
}
v_to_comp_group[i] = cur_component;
}
}

// fill up neighbor lists for the stereocenters calculation
for (i = 0; i < v_seq.size(); i++)
Expand Down Expand Up @@ -525,8 +566,25 @@ void SmilesSaver::_saveMolecule()
else
{
if (!first_component)
{
// group == 0 means no group set.
int prev_group = v_to_comp_group[i - 1];
int new_group = v_to_comp_group[i];
bool different_groups = new_group != prev_group;
if (smarts_mode && prev_group && different_groups) // if component group ended
_output.writeChar(')');

_output.writeChar('.');
first_component = false;

if (smarts_mode && new_group && different_groups) // if new group started
_output.writeChar('(');
}
else
{
if (smarts_mode && v_to_comp_group[i] > 0) // component level grouping set for this fragment
_output.writeChar('(');
first_component = false;
}
_written_components++;
}
if (write_atom)
Expand Down Expand Up @@ -599,8 +657,10 @@ void SmilesSaver::_saveMolecule()
_output.writeString("{+n}");
}
}
if (smarts_mode && v_to_comp_group[i - 1] > 0) // if group set for last fragment - add finish )
_output.writeChar(')');

if (write_extra_info && chemaxon)
if (write_extra_info && chemaxon && !smarts_mode) // no extended block in SMARTS
{
// Before we write the |...| block (ChemAxon's Extended SMILES),
// we must clean up the mess we did with the attachment points
Expand Down
25 changes: 3 additions & 22 deletions core/indigo-core/reaction/src/rsmiles_loader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -75,25 +75,6 @@ void RSmilesLoader::loadQueryReaction(QueryReaction& rxn)
_loadReaction();
}

static void _createComponenetsExternalNeighbors(QueryMolecule& qmol, std::list<std::unordered_set<int>>& externNeib)
{
std::unordered_map<int, std::unordered_set<int>> componentAtoms;
for (int i = 0; i < qmol.components.size(); ++i)
{
int componentId = qmol.components[i];
if (componentId > 0)
{ // vertice[i] belongs to component #Id
componentAtoms[componentId].insert(i);
}
}
for (auto elem : componentAtoms)
{
auto atoms = elem.second;
if (atoms.size() > 1)
externNeib.emplace_back(atoms);
}
}

void RSmilesLoader::_loadReaction()
{
_brxn->clear();
Expand Down Expand Up @@ -147,7 +128,7 @@ void RSmilesLoader::_loadReaction()
{
rcnt = std::make_unique<QueryMolecule>();
r_loader.loadQueryMolecule(static_cast<QueryMolecule&>(*rcnt));
_createComponenetsExternalNeighbors(static_cast<QueryMolecule&>(*rcnt), rcnt_extNeibs);
static_cast<QueryMolecule&>(*rcnt).getComponentNeighbors(rcnt_extNeibs);
}
rcnt_aam.copy(rcnt->reaction_atom_mapping);

Expand Down Expand Up @@ -181,7 +162,7 @@ void RSmilesLoader::_loadReaction()
{
ctlt = std::make_unique<QueryMolecule>();
c_loader.loadQueryMolecule(static_cast<QueryMolecule&>(*ctlt));
_createComponenetsExternalNeighbors(static_cast<QueryMolecule&>(*ctlt), ctlt_extNeibs);
static_cast<QueryMolecule&>(*ctlt).getComponentNeighbors(ctlt_extNeibs);
}
ctlt_aam.copy(ctlt->reaction_atom_mapping);

Expand Down Expand Up @@ -221,7 +202,7 @@ void RSmilesLoader::_loadReaction()
{
prod = std::make_unique<QueryMolecule>();
p_loader.loadQueryMolecule(static_cast<QueryMolecule&>(*prod));
_createComponenetsExternalNeighbors(static_cast<QueryMolecule&>(*prod), prod_extNeibs);
static_cast<QueryMolecule&>(*prod).getComponentNeighbors(prod_extNeibs);
}
prod_aam.copy(prod->reaction_atom_mapping);

Expand Down
2 changes: 1 addition & 1 deletion core/indigo-core/reaction/src/rsmiles_saver.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,7 @@ void RSmilesSaver::_saveReaction()
_writeMolecule(i);
}

if (chemaxon)
if (chemaxon && !smarts_mode)
{
_comma = false;
_writeFragmentsInfo();
Expand Down
18 changes: 18 additions & 0 deletions core/indigo-core/tests/tests/formats.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -323,3 +323,21 @@ M END
saver.saveMolecule(t_mol);
ASSERT_EQ(t_mol.sgroups.getSGroupCount(), 0);
}

TEST_F(IndigoCoreFormatsTest, smarts_load_save)
{
QueryMolecule q_mol;

std::string smarts_in{"([#8].[#6]).([#6].[#8])"};
BufferScanner scanner(smarts_in.c_str());
SmilesLoader loader(scanner);
loader.smarts_mode = true;
loader.loadQueryMolecule(q_mol);
Array<char> out;
ArrayOutput std_out(out);
SmilesSaver saver(std_out);
saver.smarts_mode = true;
saver.saveQueryMolecule(q_mol);
std::string smarts_out{out.ptr(), static_cast<std::size_t>(out.size())};
ASSERT_EQ(smarts_in, smarts_out);
}
11 changes: 10 additions & 1 deletion core/indigo-core/tests/tests/reaction.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@

#include <base_cpp/output.h>
#include <reaction/reaction.h>
#include <reaction/rsmiles_saver.h>

#include "common.h"

Expand Down Expand Up @@ -65,7 +66,7 @@ TEST_F(IndigoCoreReactionTest, aliases_complex)
QueryReaction reaction;
loadQueryReaction("[#6:1]=[#6:2][#6:3].[#6:4]=[#6:5][#6:6]>>[#6:3][#6:2]=[#6:5][#6:6] |$;;R1;;;R2;R1;;;R2$|", reaction);
reaction.clearAAM();
ASSERT_STREQ("[#6]=[#6]-[#6].[#6]=[#6]-[#6]>>[#6]-[#6]=[#6]-[#6] |$;;R1;;;R2;R1;;;R2$|", saveReactionSmiles(reaction, true).c_str());
ASSERT_STREQ("[#6]=[#6]-[#6].[#6]=[#6]-[#6]>>[#6]-[#6]=[#6]-[#6]", saveReactionSmiles(reaction, true).c_str());
ASSERT_STREQ("$RXN\n\n -INDIGO- 0100000000\n\n 2 1\n$MOL\n\n -INDIGO-01000000002D\n\n 3 2 0 0 0 0 0 0 0 0999 V2000\n 0.0000 0.0000 "
"0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n 0.0000 0.0000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n 0.0000 0.0000 "
" 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n 1 2 2 0 0 0 0\n 2 3 1 0 0 0 0\nA 3\nR1\nM END\n$MOL\n\n "
Expand Down Expand Up @@ -99,4 +100,12 @@ TEST_F(IndigoCoreReactionTest, smarts_reaction)
loadQueryReaction(smarts_in.c_str(), qr);
ASSERT_EQ(qr.reactantsCount(), 1);
ASSERT_EQ(qr.productsCount(), 1);
Array<char> out;
ArrayOutput std_out(out);
RSmilesSaver saver(std_out);
saver.smarts_mode = true;
saver.saveQueryReaction(qr);
out.push(0);
std::string smarts_out{out.ptr()};
ASSERT_EQ(smarts_in, smarts_out);
}

0 comments on commit 88a4ad6

Please sign in to comment.