Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

#1254 SMARTS with component-level grouping saved without '()' #1266

Merged
merged 12 commits into from
Sep 14, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions api/cpp/tests/basic/reaction.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,13 +29,13 @@ TEST(Reaction, automapReactionSmarts)
const auto& session = IndigoSession::create();
auto reaction = session->loadReaction("[C:1]=[C:2][C:3].[C:4]=[C:5][C:6]>>[C:3][C:2]=[C:5][C:6]");
reaction.automap(IndigoAutomapMode::CLEAR);
ASSERT_STREQ("[#6]=[#6]-[#6].[#6]=[#6]-[#6]>>[#6]-[#6]=[#6]-[#6] |^3:0,3,^1:1,4,7,8|", reaction.smarts().c_str());
ASSERT_STREQ("[#6]=[#6]-[#6].[#6]=[#6]-[#6]>>[#6]-[#6]=[#6]-[#6]", reaction.smarts().c_str());
}

TEST(Reaction, automapQueryReactionSmarts)
{
const auto& session = IndigoSession::create();
auto reaction = session->loadQueryReaction("[C:1]=[C:2][C:3].[C:4]=[C:5][C:6]>>[C:3][C:2]=[C:5][C:6] |$;;R1;;;R2;R1;;;R2$|");
reaction.automap(IndigoAutomapMode::CLEAR);
ASSERT_STREQ("[#6]=[#6]-[#6].[#6]=[#6]-[#6]>>[#6]-[#6]=[#6]-[#6] |$;;R1;;;R2;R1;;;R2$|", reaction.smarts().c_str());
ASSERT_STREQ("[#6]=[#6]-[#6].[#6]=[#6]-[#6]>>[#6]-[#6]=[#6]-[#6]", reaction.smarts().c_str());
}
3 changes: 3 additions & 0 deletions api/tests/integration/ref/formats/smarts.py.out
Original file line number Diff line number Diff line change
Expand Up @@ -6,3 +6,6 @@ CC[C+5]CCCCC
CC[C+5]CCCCC
**** Load and Save as Query with not list ****
[#6]-[#6]-[!#5!#6!#7]-[#6]-[#6]-[#6]-[#6]-[#6]-[#6]
**** Load and Save as Query with component-level grouping ****
([#8].[#6]) is ok. smarts_in==smarts_out
([#8].[#6]).([#8].[#6]) is ok. smarts_in==smarts_out
1 change: 1 addition & 0 deletions api/tests/integration/ref/rsmiles/rsmiles_smarts.py.out
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
SMARTS component-level grouping load ok
SMARTS component-level grouping save ok
15 changes: 15 additions & 0 deletions api/tests/integration/tests/formats/smarts.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,17 @@ def testSmarts(m):
print(m.smiles())


def test_smarts_load_save(smarts_in):
m = indigo.loadSmarts(smarts_in)
smarts_out = m.smarts()
if smarts_in == smarts_out:
print("%s is ok. smarts_in==smarts_out" % smarts_in)
else:
print("smarts_in!=smarts_out")
print(" smarts_in=%s", smarts_in)
print("smarts_out=%s", smarts_out)


molstr = """
Ketcher 11241617102D 1 1.00000 0.00000 0

Expand Down Expand Up @@ -81,3 +92,7 @@ def testSmarts(m):
print("**** Load and Save as Query with not list ****")
m = indigo.loadQueryMolecule(notlist)
print(m.smarts())

print("**** Load and Save as Query with component-level grouping ****")
test_smarts_load_save("([#8].[#6])")
test_smarts_load_save("([#8].[#6]).([#8].[#6])")
20 changes: 15 additions & 5 deletions api/tests/integration/tests/rsmiles/rsmiles_smarts.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,18 @@

indigo = Indigo()


rxn1 = indigo.loadReactionSmarts("([#8:1].[#6:2])>>([#8:1].[#6:2])")
assert rxn1.countReactants() == 1
assert rxn1.countProducts() == 1
print("SMARTS component-level grouping load ok")
smarts_in = "([#8:1].[#6:2])>>([#8:1].[#6:2])"
rxn1 = indigo.loadReactionSmarts(smarts_in)
if rxn1.countReactants() == 1 and rxn1.countProducts() == 1:
print("SMARTS component-level grouping load ok")
else:
print("SMARTS component-level grouping load failed")
print("rxn1.countReactants()=%s" % rxn1.countReactants())
print("rxn1.countProducts()=%s" % rxn1.countProducts())
smarts_out = rxn1.smarts()
if smarts_in == smarts_out:
print("SMARTS component-level grouping save ok")
else:
print("SMARTS component-level grouping save failed")
print("smart_in=%s" % smarts_in)
print("smart_ou=%s" % smarts_out)
2 changes: 2 additions & 0 deletions core/indigo-core/molecule/query_molecule.h
Original file line number Diff line number Diff line change
Expand Up @@ -372,6 +372,8 @@ namespace indigo
// must belong to different connected components of the target molecule
Array<int> components;

void getComponentNeighbors(std::list<std::unordered_set<int>>& componentNeighbors);

void invalidateAtom(int index, int mask) override;

int getAtomMaxExteralConnectivity(int idx);
Expand Down
20 changes: 20 additions & 0 deletions core/indigo-core/molecule/src/query_molecule.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2140,3 +2140,23 @@ void QueryMolecule::getQueryAtomLabel(int qa, Array<char>& result)
if (it != query_atom_labels.end())
result.readString(it->second.c_str(), true);
}

void QueryMolecule::getComponentNeighbors(std::list<std::unordered_set<int>>& componentNeighbors)
{
std::unordered_map<int, std::unordered_set<int>> componentAtoms;
for (int i = 0; i < components.size(); ++i)
{
int componentId = components[i];
if (componentId > 0)
{ // vertice[i] belongs to component #Id
componentAtoms[componentId].insert(i);
}
}
componentNeighbors.clear();
for (auto elem : componentAtoms)
{
auto atoms = elem.second;
if (atoms.size() > 1)
componentNeighbors.emplace_back(atoms);
}
}
64 changes: 62 additions & 2 deletions core/indigo-core/molecule/src/smiles_saver.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -185,6 +185,47 @@ void SmilesSaver::_saveMolecule()
walk.walk();

const Array<DfsWalk::SeqElem>& v_seq = walk.getSequence();
Array<int> v_to_comp_group;
v_to_comp_group.resize(v_seq.size());
v_to_comp_group.fffill();

if (_qmol != nullptr && smarts_mode && _qmol->components.size() >= v_seq.size())
{
if (v_seq.size() < 1)
return; // No atoms to save
std::set<int> components;
int cur_component = -1;
for (int i = 0; i < v_seq.size(); ++i)
{
// In v_seq each fragment started with vertex which parent == -1
// In SMARTS some fragments could be grouped (component-level grouping)
// In QueryMolecule group number stored in "".components" member. GroupId == 0 means no group defined.
// Each fragment - connected graph, so all vertexes should belong to one group.
// All group fragments should go one by one - in SMARTS its inside "()".
if (v_seq[i].parent_vertex < 0) // New Fragment
{
int new_component = _qmol->components[v_seq[i].idx];
// if component defined for new fragment(id>0) and its different from previous and seen before
if (new_component > 0 && new_component != cur_component && components.count(new_component))
{
// According to the DfsWalk code, the groups components should be neighbors.
// If will be found case when it wrong - add code to rearrange fragments
throw Error("SMARTS fragments need to reaarange.");
}
components.emplace(new_component);
cur_component = new_component;
}
else
{
if (cur_component != _qmol->components[v_seq[i].idx])
{
// Fragment contains atoms from different components - something went wrong
throw Error("Fragment contains atoms from different components.");
}
}
v_to_comp_group[i] = cur_component;
}
}

// fill up neighbor lists for the stereocenters calculation
for (i = 0; i < v_seq.size(); i++)
Expand Down Expand Up @@ -525,8 +566,25 @@ void SmilesSaver::_saveMolecule()
else
{
if (!first_component)
{
// group == 0 means no group set.
int prev_group = v_to_comp_group[i - 1];
int new_group = v_to_comp_group[i];
bool different_groups = new_group != prev_group;
if (smarts_mode && prev_group && different_groups) // if component group ended
_output.writeChar(')');

_output.writeChar('.');
first_component = false;

if (smarts_mode && new_group && different_groups) // if new group started
_output.writeChar('(');
}
else
{
if (smarts_mode && v_to_comp_group[i] > 0) // component level grouping set for this fragment
_output.writeChar('(');
first_component = false;
}
_written_components++;
}
if (write_atom)
Expand Down Expand Up @@ -599,8 +657,10 @@ void SmilesSaver::_saveMolecule()
_output.writeString("{+n}");
}
}
if (smarts_mode && v_to_comp_group[i - 1] > 0) // if group set for last fragment - add finish )
_output.writeChar(')');

if (write_extra_info && chemaxon)
if (write_extra_info && chemaxon && !smarts_mode) // no extended block in SMARTS
{
// Before we write the |...| block (ChemAxon's Extended SMILES),
// we must clean up the mess we did with the attachment points
Expand Down
25 changes: 3 additions & 22 deletions core/indigo-core/reaction/src/rsmiles_loader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -75,25 +75,6 @@ void RSmilesLoader::loadQueryReaction(QueryReaction& rxn)
_loadReaction();
}

static void _createComponenetsExternalNeighbors(QueryMolecule& qmol, std::list<std::unordered_set<int>>& externNeib)
{
std::unordered_map<int, std::unordered_set<int>> componentAtoms;
for (int i = 0; i < qmol.components.size(); ++i)
{
int componentId = qmol.components[i];
if (componentId > 0)
{ // vertice[i] belongs to component #Id
componentAtoms[componentId].insert(i);
}
}
for (auto elem : componentAtoms)
{
auto atoms = elem.second;
if (atoms.size() > 1)
externNeib.emplace_back(atoms);
}
}

void RSmilesLoader::_loadReaction()
{
_brxn->clear();
Expand Down Expand Up @@ -147,7 +128,7 @@ void RSmilesLoader::_loadReaction()
{
rcnt = std::make_unique<QueryMolecule>();
r_loader.loadQueryMolecule(static_cast<QueryMolecule&>(*rcnt));
_createComponenetsExternalNeighbors(static_cast<QueryMolecule&>(*rcnt), rcnt_extNeibs);
static_cast<QueryMolecule&>(*rcnt).getComponentNeighbors(rcnt_extNeibs);
}
rcnt_aam.copy(rcnt->reaction_atom_mapping);

Expand Down Expand Up @@ -181,7 +162,7 @@ void RSmilesLoader::_loadReaction()
{
ctlt = std::make_unique<QueryMolecule>();
c_loader.loadQueryMolecule(static_cast<QueryMolecule&>(*ctlt));
_createComponenetsExternalNeighbors(static_cast<QueryMolecule&>(*ctlt), ctlt_extNeibs);
static_cast<QueryMolecule&>(*ctlt).getComponentNeighbors(ctlt_extNeibs);
}
ctlt_aam.copy(ctlt->reaction_atom_mapping);

Expand Down Expand Up @@ -221,7 +202,7 @@ void RSmilesLoader::_loadReaction()
{
prod = std::make_unique<QueryMolecule>();
p_loader.loadQueryMolecule(static_cast<QueryMolecule&>(*prod));
_createComponenetsExternalNeighbors(static_cast<QueryMolecule&>(*prod), prod_extNeibs);
static_cast<QueryMolecule&>(*prod).getComponentNeighbors(prod_extNeibs);
}
prod_aam.copy(prod->reaction_atom_mapping);

Expand Down
2 changes: 1 addition & 1 deletion core/indigo-core/reaction/src/rsmiles_saver.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,7 @@ void RSmilesSaver::_saveReaction()
_writeMolecule(i);
}

if (chemaxon)
if (chemaxon && !smarts_mode)
{
_comma = false;
_writeFragmentsInfo();
Expand Down
18 changes: 18 additions & 0 deletions core/indigo-core/tests/tests/formats.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -323,3 +323,21 @@ M END
saver.saveMolecule(t_mol);
ASSERT_EQ(t_mol.sgroups.getSGroupCount(), 0);
}

TEST_F(IndigoCoreFormatsTest, smarts_load_save)
{
QueryMolecule q_mol;

std::string smarts_in{"([#8].[#6]).([#6].[#8])"};
BufferScanner scanner(smarts_in.c_str());
SmilesLoader loader(scanner);
loader.smarts_mode = true;
loader.loadQueryMolecule(q_mol);
Array<char> out;
ArrayOutput std_out(out);
SmilesSaver saver(std_out);
saver.smarts_mode = true;
saver.saveQueryMolecule(q_mol);
std::string smarts_out{out.ptr(), static_cast<std::size_t>(out.size())};
ASSERT_EQ(smarts_in, smarts_out);
}
11 changes: 10 additions & 1 deletion core/indigo-core/tests/tests/reaction.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@

#include <base_cpp/output.h>
#include <reaction/reaction.h>
#include <reaction/rsmiles_saver.h>

#include "common.h"

Expand Down Expand Up @@ -65,7 +66,7 @@ TEST_F(IndigoCoreReactionTest, aliases_complex)
QueryReaction reaction;
loadQueryReaction("[#6:1]=[#6:2][#6:3].[#6:4]=[#6:5][#6:6]>>[#6:3][#6:2]=[#6:5][#6:6] |$;;R1;;;R2;R1;;;R2$|", reaction);
reaction.clearAAM();
ASSERT_STREQ("[#6]=[#6]-[#6].[#6]=[#6]-[#6]>>[#6]-[#6]=[#6]-[#6] |$;;R1;;;R2;R1;;;R2$|", saveReactionSmiles(reaction, true).c_str());
ASSERT_STREQ("[#6]=[#6]-[#6].[#6]=[#6]-[#6]>>[#6]-[#6]=[#6]-[#6]", saveReactionSmiles(reaction, true).c_str());
ASSERT_STREQ("$RXN\n\n -INDIGO- 0100000000\n\n 2 1\n$MOL\n\n -INDIGO-01000000002D\n\n 3 2 0 0 0 0 0 0 0 0999 V2000\n 0.0000 0.0000 "
"0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n 0.0000 0.0000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n 0.0000 0.0000 "
" 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n 1 2 2 0 0 0 0\n 2 3 1 0 0 0 0\nA 3\nR1\nM END\n$MOL\n\n "
Expand Down Expand Up @@ -99,4 +100,12 @@ TEST_F(IndigoCoreReactionTest, smarts_reaction)
loadQueryReaction(smarts_in.c_str(), qr);
ASSERT_EQ(qr.reactantsCount(), 1);
ASSERT_EQ(qr.productsCount(), 1);
Array<char> out;
ArrayOutput std_out(out);
RSmilesSaver saver(std_out);
saver.smarts_mode = true;
saver.saveQueryReaction(qr);
out.push(0);
std::string smarts_out{out.ptr()};
ASSERT_EQ(smarts_in, smarts_out);
}
Loading