From 58c046fd6b72186daaf4ce6a83c6a4800659de3c Mon Sep 17 00:00:00 2001 From: Aliaksandr Dziarkach Date: Thu, 7 Sep 2023 21:20:33 +0300 Subject: [PATCH 01/12] #1254 SMARTS with component-level grouping saved without '()' Initial code changes. --- .../indigo-core/molecule/src/smiles_saver.cpp | 42 ++++++++++++++++++- 1 file changed, 40 insertions(+), 2 deletions(-) diff --git a/core/indigo-core/molecule/src/smiles_saver.cpp b/core/indigo-core/molecule/src/smiles_saver.cpp index 1bc2397e7f..fb45f16a19 100644 --- a/core/indigo-core/molecule/src/smiles_saver.cpp +++ b/core/indigo-core/molecule/src/smiles_saver.cpp @@ -75,7 +75,45 @@ void SmilesSaver::saveQueryMolecule(QueryMolecule& mol) _bmol = &mol; _qmol = &mol; _mol = 0; - _saveMolecule(); + if (smarts_mode) + { + std::unordered_set component_nums; + for (int i = 0; i < _qmol->components.size(); i++) + { + component_nums.insert(i); + } + if (component_nums.size() > 1) + { + // decompose _qmol and save each component separately + std::list> extNeighbors; + // fill extNeighbors + int fragment_count = _qmol->countComponents(extNeighbors); + for (int i = 0; i < fragment_count; ++i) + { + Array mapping; + auto fragment = std::make_unique(); + Filter filt(_qmol->getDecomposition().ptr(), Filter::EQ, i); + fragment->makeSubmolecule(*_qmol, filt, &mapping, 0); + saveQueryMolecule(*fragment); + } + _saveMolecule(); + } + else + { + // _qmol.components contains only one value. + // If this value == 0 - no grouping used + // otherwise grouping used - SMARTS should be in parentheses + if (component_nums.count(0) == 0) + _output.writeChar('('); + _saveMolecule(); + if (component_nums.count(0) == 0) + _output.writeChar(')'); + } + } + else + { + _saveMolecule(); + } } void SmilesSaver::_saveMolecule() @@ -600,7 +638,7 @@ void SmilesSaver::_saveMolecule() } } - if (write_extra_info && chemaxon) + if (write_extra_info && chemaxon && !smarts_mode) // no extended block in SMARTS { // Before we write the |...| block (ChemAxon's Extended SMILES), // we must clean up the mess we did with the attachment points From a39f1c74e92f5945a7921e408d5f4fe8f8d5fbd1 Mon Sep 17 00:00:00 2001 From: Aliaksandr Dziarkach Date: Fri, 8 Sep 2023 19:19:10 +0300 Subject: [PATCH 02/12] #1254 SMARTS with component-level grouping saved without '()' fix reaction saving, prepare to molecule decomposition in smiles_saver --- .../ref/rsmiles/rsmiles_smarts.py.out | 1 + api/tests/integration/tests/formats/smarts.py | 14 ++++++++++ .../tests/rsmiles/rsmiles_smarts.py | 7 +++-- core/indigo-core/molecule/query_molecule.h | 6 +++++ .../molecule/src/query_molecule.cpp | 27 +++++++++++++++++++ .../indigo-core/molecule/src/smiles_saver.cpp | 8 +++--- .../reaction/src/rsmiles_loader.cpp | 25 +++-------------- .../reaction/src/rsmiles_saver.cpp | 2 +- core/indigo-core/tests/tests/formats.cpp | 20 ++++++++++++++ core/indigo-core/tests/tests/reaction.cpp | 9 +++++++ 10 files changed, 90 insertions(+), 29 deletions(-) diff --git a/api/tests/integration/ref/rsmiles/rsmiles_smarts.py.out b/api/tests/integration/ref/rsmiles/rsmiles_smarts.py.out index f883375dca..5b18061691 100644 --- a/api/tests/integration/ref/rsmiles/rsmiles_smarts.py.out +++ b/api/tests/integration/ref/rsmiles/rsmiles_smarts.py.out @@ -1 +1,2 @@ SMARTS component-level grouping load ok +SMARTS component-level grouping save ok diff --git a/api/tests/integration/tests/formats/smarts.py b/api/tests/integration/tests/formats/smarts.py index d16c49c348..a8139cef3d 100644 --- a/api/tests/integration/tests/formats/smarts.py +++ b/api/tests/integration/tests/formats/smarts.py @@ -16,6 +16,17 @@ def testSmarts(m): print(m.smiles()) +def test_smarts_load_save(smarts_in): + m = indigo.loadSmarts(smarts_in) + smarts_out = m.smarts() + if smarts_in == smarts_out: + print("smarts_in==smarts_out") + else: + print("smarts_in!=smarts_out") + print(" smarts_in=%s", smarts_in) + print("smarts_out=%s", smarts_in) + + molstr = """ Ketcher 11241617102D 1 1.00000 0.00000 0 @@ -81,3 +92,6 @@ def testSmarts(m): print("**** Load and Save as Query with not list ****") m = indigo.loadQueryMolecule(notlist) print(m.smarts()) + +test_smarts_load_save("([#8:1].[#6:2])") +test_smarts_load_save("([#8:1].[#6:2]).([#8:1].[#6:2])") diff --git a/api/tests/integration/tests/rsmiles/rsmiles_smarts.py b/api/tests/integration/tests/rsmiles/rsmiles_smarts.py index 8fd4dcaa75..bab98d62c1 100644 --- a/api/tests/integration/tests/rsmiles/rsmiles_smarts.py +++ b/api/tests/integration/tests/rsmiles/rsmiles_smarts.py @@ -10,8 +10,11 @@ indigo = Indigo() - -rxn1 = indigo.loadReactionSmarts("([#8:1].[#6:2])>>([#8:1].[#6:2])") +smarts_in = "([#8:1].[#6:2])>>([#8:1].[#6:2])" +rxn1 = indigo.loadReactionSmarts(smarts_in) assert rxn1.countReactants() == 1 assert rxn1.countProducts() == 1 print("SMARTS component-level grouping load ok") +smarts_out = rxn1.smarts() +assert smarts_in == smarts_out +print("SMARTS component-level grouping save ok") diff --git a/core/indigo-core/molecule/query_molecule.h b/core/indigo-core/molecule/query_molecule.h index dc6c51fd39..1eaa2b2696 100644 --- a/core/indigo-core/molecule/query_molecule.h +++ b/core/indigo-core/molecule/query_molecule.h @@ -372,6 +372,8 @@ namespace indigo // must belong to different connected components of the target molecule Array components; + std::list>& getComponentNeighbors(); + void invalidateAtom(int index, int mask) override; int getAtomMaxExteralConnectivity(int idx); @@ -397,6 +399,10 @@ namespace indigo PtrArray _atoms; PtrArray _bonds; + + std::list> _component_neighbors; + bool _component_neighbors_valid; + void _calculateComponentNeighbors(); }; } // namespace indigo diff --git a/core/indigo-core/molecule/src/query_molecule.cpp b/core/indigo-core/molecule/src/query_molecule.cpp index d32df22ec7..891686f19c 100644 --- a/core/indigo-core/molecule/src/query_molecule.cpp +++ b/core/indigo-core/molecule/src/query_molecule.cpp @@ -2140,3 +2140,30 @@ void QueryMolecule::getQueryAtomLabel(int qa, Array& result) if (it != query_atom_labels.end()) result.readString(it->second.c_str(), true); } + +void QueryMolecule::_calculateComponentNeighbors() +{ + std::unordered_map> componentAtoms; + for (int i = 0; i < components.size(); ++i) + { + int componentId = components[i]; + if (componentId > 0) + { // vertice[i] belongs to component #Id + componentAtoms[componentId].insert(i); + } + } + for (auto elem : componentAtoms) + { + auto atoms = elem.second; + if (atoms.size() > 1) + _component_neighbors.emplace_back(atoms); + } +} + +std::list>& QueryMolecule::getComponentNeighbors() +{ + if(!_component_neighbors_valid) + _calculateComponentNeighbors(); + + return _component_neighbors; +} \ No newline at end of file diff --git a/core/indigo-core/molecule/src/smiles_saver.cpp b/core/indigo-core/molecule/src/smiles_saver.cpp index fb45f16a19..e598c693f8 100644 --- a/core/indigo-core/molecule/src/smiles_saver.cpp +++ b/core/indigo-core/molecule/src/smiles_saver.cpp @@ -80,13 +80,13 @@ void SmilesSaver::saveQueryMolecule(QueryMolecule& mol) std::unordered_set component_nums; for (int i = 0; i < _qmol->components.size(); i++) { - component_nums.insert(i); + component_nums.insert(_qmol->components[i]); } if (component_nums.size() > 1) { + std::unique_ptr mol = std::make_unique(); // decompose _qmol and save each component separately - std::list> extNeighbors; - // fill extNeighbors + std::list>& extNeighbors = _qmol->getComponentNeighbors(); int fragment_count = _qmol->countComponents(extNeighbors); for (int i = 0; i < fragment_count; ++i) { @@ -94,9 +94,9 @@ void SmilesSaver::saveQueryMolecule(QueryMolecule& mol) auto fragment = std::make_unique(); Filter filt(_qmol->getDecomposition().ptr(), Filter::EQ, i); fragment->makeSubmolecule(*_qmol, filt, &mapping, 0); + mol->mergeWithMolecule(*fragment, 0); saveQueryMolecule(*fragment); } - _saveMolecule(); } else { diff --git a/core/indigo-core/reaction/src/rsmiles_loader.cpp b/core/indigo-core/reaction/src/rsmiles_loader.cpp index b2eed45a2f..8a841271ea 100644 --- a/core/indigo-core/reaction/src/rsmiles_loader.cpp +++ b/core/indigo-core/reaction/src/rsmiles_loader.cpp @@ -75,25 +75,6 @@ void RSmilesLoader::loadQueryReaction(QueryReaction& rxn) _loadReaction(); } -static void _createComponenetsExternalNeighbors(QueryMolecule& qmol, std::list>& externNeib) -{ - std::unordered_map> componentAtoms; - for (int i = 0; i < qmol.components.size(); ++i) - { - int componentId = qmol.components[i]; - if (componentId > 0) - { // vertice[i] belongs to component #Id - componentAtoms[componentId].insert(i); - } - } - for (auto elem : componentAtoms) - { - auto atoms = elem.second; - if (atoms.size() > 1) - externNeib.emplace_back(atoms); - } -} - void RSmilesLoader::_loadReaction() { _brxn->clear(); @@ -147,7 +128,7 @@ void RSmilesLoader::_loadReaction() { rcnt = std::make_unique(); r_loader.loadQueryMolecule(static_cast(*rcnt)); - _createComponenetsExternalNeighbors(static_cast(*rcnt), rcnt_extNeibs); + rcnt_extNeibs = static_cast(*rcnt).getComponentNeighbors(); } rcnt_aam.copy(rcnt->reaction_atom_mapping); @@ -181,7 +162,7 @@ void RSmilesLoader::_loadReaction() { ctlt = std::make_unique(); c_loader.loadQueryMolecule(static_cast(*ctlt)); - _createComponenetsExternalNeighbors(static_cast(*ctlt), ctlt_extNeibs); + ctlt_extNeibs = static_cast(*ctlt).getComponentNeighbors(); } ctlt_aam.copy(ctlt->reaction_atom_mapping); @@ -221,7 +202,7 @@ void RSmilesLoader::_loadReaction() { prod = std::make_unique(); p_loader.loadQueryMolecule(static_cast(*prod)); - _createComponenetsExternalNeighbors(static_cast(*prod), prod_extNeibs); + prod_extNeibs = static_cast(*prod).getComponentNeighbors(); } prod_aam.copy(prod->reaction_atom_mapping); diff --git a/core/indigo-core/reaction/src/rsmiles_saver.cpp b/core/indigo-core/reaction/src/rsmiles_saver.cpp index cafcc18f54..a103eafd79 100644 --- a/core/indigo-core/reaction/src/rsmiles_saver.cpp +++ b/core/indigo-core/reaction/src/rsmiles_saver.cpp @@ -146,7 +146,7 @@ void RSmilesSaver::_saveReaction() _writeMolecule(i); } - if (chemaxon) + if (chemaxon && !smarts_mode) { _comma = false; _writeFragmentsInfo(); diff --git a/core/indigo-core/tests/tests/formats.cpp b/core/indigo-core/tests/tests/formats.cpp index 9d4c960daa..fede82cafc 100644 --- a/core/indigo-core/tests/tests/formats.cpp +++ b/core/indigo-core/tests/tests/formats.cpp @@ -323,3 +323,23 @@ M END saver.saveMolecule(t_mol); ASSERT_EQ(t_mol.sgroups.getSGroupCount(), 0); } + +TEST_F(IndigoCoreFormatsTest, smarts_load_save) +{ + QueryMolecule q_mol; + + //std::string smarts_in{"([#8:1].[#6:2])"}; + std::string smarts_in{"([#8:1].[#6:2]).([#8:1].[#6:2])"}; + BufferScanner scanner(smarts_in.c_str()); + SmilesLoader loader(scanner); + loader.smarts_mode = true; + loader.loadQueryMolecule(q_mol); + Array out; + ArrayOutput std_out(out); + SmilesSaver saver(std_out); + saver.smarts_mode = true; + saver.saveQueryMolecule(q_mol); + std::string smarts_out{out.ptr(), static_cast(out.size())}; + printf("smart_in=%s\nsmart_out=%s\n", smarts_in, smarts_out); +} + diff --git a/core/indigo-core/tests/tests/reaction.cpp b/core/indigo-core/tests/tests/reaction.cpp index 41b378f4bc..089f4b8965 100644 --- a/core/indigo-core/tests/tests/reaction.cpp +++ b/core/indigo-core/tests/tests/reaction.cpp @@ -20,6 +20,7 @@ #include #include +#include #include "common.h" @@ -99,4 +100,12 @@ TEST_F(IndigoCoreReactionTest, smarts_reaction) loadQueryReaction(smarts_in.c_str(), qr); ASSERT_EQ(qr.reactantsCount(), 1); ASSERT_EQ(qr.productsCount(), 1); + Array out; + ArrayOutput std_out(out); + RSmilesSaver saver(std_out); + saver.smarts_mode = true; + saver.saveQueryReaction(qr); + out.push(0); + std::string smarts_out{out.ptr()}; + ASSERT_EQ(smarts_in, smarts_out); } \ No newline at end of file From d0bc57fa209f367e632fcddd0e080f603e8242cb Mon Sep 17 00:00:00 2001 From: Aliaksandr Dziarkach Date: Mon, 11 Sep 2023 21:15:33 +0300 Subject: [PATCH 03/12] #1254 SMARTS with component-level grouping saved without '()' Add '()' save support. Add UTs. --- .../indigo-core/molecule/src/smiles_saver.cpp | 102 +++++++++++------- core/indigo-core/tests/tests/formats.cpp | 4 +- 2 files changed, 64 insertions(+), 42 deletions(-) diff --git a/core/indigo-core/molecule/src/smiles_saver.cpp b/core/indigo-core/molecule/src/smiles_saver.cpp index e598c693f8..ca72040e10 100644 --- a/core/indigo-core/molecule/src/smiles_saver.cpp +++ b/core/indigo-core/molecule/src/smiles_saver.cpp @@ -75,45 +75,7 @@ void SmilesSaver::saveQueryMolecule(QueryMolecule& mol) _bmol = &mol; _qmol = &mol; _mol = 0; - if (smarts_mode) - { - std::unordered_set component_nums; - for (int i = 0; i < _qmol->components.size(); i++) - { - component_nums.insert(_qmol->components[i]); - } - if (component_nums.size() > 1) - { - std::unique_ptr mol = std::make_unique(); - // decompose _qmol and save each component separately - std::list>& extNeighbors = _qmol->getComponentNeighbors(); - int fragment_count = _qmol->countComponents(extNeighbors); - for (int i = 0; i < fragment_count; ++i) - { - Array mapping; - auto fragment = std::make_unique(); - Filter filt(_qmol->getDecomposition().ptr(), Filter::EQ, i); - fragment->makeSubmolecule(*_qmol, filt, &mapping, 0); - mol->mergeWithMolecule(*fragment, 0); - saveQueryMolecule(*fragment); - } - } - else - { - // _qmol.components contains only one value. - // If this value == 0 - no grouping used - // otherwise grouping used - SMARTS should be in parentheses - if (component_nums.count(0) == 0) - _output.writeChar('('); - _saveMolecule(); - if (component_nums.count(0) == 0) - _output.writeChar(')'); - } - } - else - { - _saveMolecule(); - } + _saveMolecule(); } void SmilesSaver::_saveMolecule() @@ -223,6 +185,47 @@ void SmilesSaver::_saveMolecule() walk.walk(); const Array& v_seq = walk.getSequence(); + Array v_to_comp_group; + v_to_comp_group.resize(v_seq.size()); + v_to_comp_group.fffill(); + + if (_qmol != nullptr && smarts_mode) + { + if (v_seq.size() < 1) + return; // No atoms to save + std::set components; + int cur_component = -1; + for (int i = 0; i < v_seq.size(); ++i) + { + // In v_seq each fragment started with vertex which parent == -1 + // In SMARTS some fragments could be grouped (component-level grouping) + // In QueryMolecule group number stored in "".components" member. GroupId == 0 means no group defined. + // Each fragment - connected graph, so all vertexes should belong to one group. + // All group fragments should go one by one - in SMARTS its inside "()". + if (v_seq[i].parent_vertex < 0) // New Fragment + { + int new_component = _qmol->components[v_seq[i].idx]; + // if component defined for new fragment(id>0) and its different from previous and seen before + if (new_component > 0 && new_component != cur_component && components.count(new_component)) + { + // According to the DfsWalk code, the groups components should be neighbors. + // If will be found case when it wrong - add code to rearrange fragments + throw Error("SMARTS fragments need to reaarange."); + } + components.emplace(new_component); + cur_component = new_component; + } + else + { + if (cur_component != _qmol->components[v_seq[i].idx]) + { + // Fragment contains atoms from different components - something went wrong + throw Error("Fragment contains atoms from different components."); + } + } + v_to_comp_group[i] = cur_component; + } + } // fill up neighbor lists for the stereocenters calculation for (i = 0; i < v_seq.size(); i++) @@ -563,8 +566,25 @@ void SmilesSaver::_saveMolecule() else { if (!first_component) + { + // group == 0 means no group set. + int prev_group = v_to_comp_group[i - 1]; + int new_group = v_to_comp_group[i]; + bool different_groups = new_group != prev_group; + if (smarts_mode && prev_group && different_groups) // if component group ended + _output.writeChar(')'); + _output.writeChar('.'); - first_component = false; + + if (smarts_mode && new_group && different_groups) // if new group started + _output.writeChar('('); + } + else + { + if (smarts_mode && v_to_comp_group[i] > 0) // component level grouping set for this fragment + _output.writeChar('('); + first_component = false; + } _written_components++; } if (write_atom) @@ -637,6 +657,8 @@ void SmilesSaver::_saveMolecule() _output.writeString("{+n}"); } } + if (smarts_mode && v_to_comp_group[i - 1]) // if group set for last fragment - add finish ) + _output.writeChar(')'); if (write_extra_info && chemaxon && !smarts_mode) // no extended block in SMARTS { diff --git a/core/indigo-core/tests/tests/formats.cpp b/core/indigo-core/tests/tests/formats.cpp index fede82cafc..4b35d62915 100644 --- a/core/indigo-core/tests/tests/formats.cpp +++ b/core/indigo-core/tests/tests/formats.cpp @@ -329,7 +329,7 @@ TEST_F(IndigoCoreFormatsTest, smarts_load_save) QueryMolecule q_mol; //std::string smarts_in{"([#8:1].[#6:2])"}; - std::string smarts_in{"([#8:1].[#6:2]).([#8:1].[#6:2])"}; + std::string smarts_in{"([#8].[#6]).([#6].[#8])"}; BufferScanner scanner(smarts_in.c_str()); SmilesLoader loader(scanner); loader.smarts_mode = true; @@ -340,6 +340,6 @@ TEST_F(IndigoCoreFormatsTest, smarts_load_save) saver.smarts_mode = true; saver.saveQueryMolecule(q_mol); std::string smarts_out{out.ptr(), static_cast(out.size())}; - printf("smart_in=%s\nsmart_out=%s\n", smarts_in, smarts_out); + ASSERT_EQ(smarts_in, smarts_out); } From d1abed00dc11ce1ec4a05d6ce28e326d9fa8e121 Mon Sep 17 00:00:00 2001 From: Aliaksandr Dziarkach <18146690+AliaksandrDziarkach@users.noreply.github.com> Date: Tue, 12 Sep 2023 00:07:25 +0300 Subject: [PATCH 04/12] #1254 SMARTS with component-level grouping saved without '()' Fix format --- core/indigo-core/molecule/src/query_molecule.cpp | 2 +- core/indigo-core/tests/tests/formats.cpp | 2 -- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/core/indigo-core/molecule/src/query_molecule.cpp b/core/indigo-core/molecule/src/query_molecule.cpp index 891686f19c..4b0963007d 100644 --- a/core/indigo-core/molecule/src/query_molecule.cpp +++ b/core/indigo-core/molecule/src/query_molecule.cpp @@ -2162,7 +2162,7 @@ void QueryMolecule::_calculateComponentNeighbors() std::list>& QueryMolecule::getComponentNeighbors() { - if(!_component_neighbors_valid) + if (!_component_neighbors_valid) _calculateComponentNeighbors(); return _component_neighbors; diff --git a/core/indigo-core/tests/tests/formats.cpp b/core/indigo-core/tests/tests/formats.cpp index 4b35d62915..b06cb89372 100644 --- a/core/indigo-core/tests/tests/formats.cpp +++ b/core/indigo-core/tests/tests/formats.cpp @@ -328,7 +328,6 @@ TEST_F(IndigoCoreFormatsTest, smarts_load_save) { QueryMolecule q_mol; - //std::string smarts_in{"([#8:1].[#6:2])"}; std::string smarts_in{"([#8].[#6]).([#6].[#8])"}; BufferScanner scanner(smarts_in.c_str()); SmilesLoader loader(scanner); @@ -342,4 +341,3 @@ TEST_F(IndigoCoreFormatsTest, smarts_load_save) std::string smarts_out{out.ptr(), static_cast(out.size())}; ASSERT_EQ(smarts_in, smarts_out); } - From b9a9c45a6f5823efe23cc1d68e860504e97f3187 Mon Sep 17 00:00:00 2001 From: Aliaksandr Dziarkach <18146690+AliaksandrDziarkach@users.noreply.github.com> Date: Tue, 12 Sep 2023 03:03:50 +0300 Subject: [PATCH 05/12] #1254 SMARTS with component-level grouping saved without '()' Fix UTs --- api/tests/integration/ref/formats/smarts.py.out | 3 +++ api/tests/integration/tests/formats/smarts.py | 9 +++++---- core/indigo-core/molecule/src/smiles_saver.cpp | 4 ++-- 3 files changed, 10 insertions(+), 6 deletions(-) diff --git a/api/tests/integration/ref/formats/smarts.py.out b/api/tests/integration/ref/formats/smarts.py.out index 605a3dc506..c61ce65b05 100644 --- a/api/tests/integration/ref/formats/smarts.py.out +++ b/api/tests/integration/ref/formats/smarts.py.out @@ -6,3 +6,6 @@ CC[C+5]CCCCC CC[C+5]CCCCC **** Load and Save as Query with not list **** [#6]-[#6]-[!#5!#6!#7]-[#6]-[#6]-[#6]-[#6]-[#6]-[#6] +**** Load and Save as Query with component-level grouping **** +([#8].[#6]) is ok. smarts_in==smarts_out +([#8].[#6]).([#8].[#6]) is ok. smarts_in==smarts_out diff --git a/api/tests/integration/tests/formats/smarts.py b/api/tests/integration/tests/formats/smarts.py index a8139cef3d..6d4c4b039a 100644 --- a/api/tests/integration/tests/formats/smarts.py +++ b/api/tests/integration/tests/formats/smarts.py @@ -20,11 +20,11 @@ def test_smarts_load_save(smarts_in): m = indigo.loadSmarts(smarts_in) smarts_out = m.smarts() if smarts_in == smarts_out: - print("smarts_in==smarts_out") + print("%s is ok. smarts_in==smarts_out" % smarts_in) else: print("smarts_in!=smarts_out") print(" smarts_in=%s", smarts_in) - print("smarts_out=%s", smarts_in) + print("smarts_out=%s", smarts_out) molstr = """ @@ -93,5 +93,6 @@ def test_smarts_load_save(smarts_in): m = indigo.loadQueryMolecule(notlist) print(m.smarts()) -test_smarts_load_save("([#8:1].[#6:2])") -test_smarts_load_save("([#8:1].[#6:2]).([#8:1].[#6:2])") +print("**** Load and Save as Query with component-level grouping ****") +test_smarts_load_save("([#8].[#6])") +test_smarts_load_save("([#8].[#6]).([#8].[#6])") diff --git a/core/indigo-core/molecule/src/smiles_saver.cpp b/core/indigo-core/molecule/src/smiles_saver.cpp index ca72040e10..c91ffc0af4 100644 --- a/core/indigo-core/molecule/src/smiles_saver.cpp +++ b/core/indigo-core/molecule/src/smiles_saver.cpp @@ -189,7 +189,7 @@ void SmilesSaver::_saveMolecule() v_to_comp_group.resize(v_seq.size()); v_to_comp_group.fffill(); - if (_qmol != nullptr && smarts_mode) + if (_qmol != nullptr && smarts_mode && _qmol->components.size() >= v_seq.size()) { if (v_seq.size() < 1) return; // No atoms to save @@ -657,7 +657,7 @@ void SmilesSaver::_saveMolecule() _output.writeString("{+n}"); } } - if (smarts_mode && v_to_comp_group[i - 1]) // if group set for last fragment - add finish ) + if (smarts_mode && v_to_comp_group[i - 1] > 0) // if group set for last fragment - add finish ) _output.writeChar(')'); if (write_extra_info && chemaxon && !smarts_mode) // no extended block in SMARTS From 0457569f23cbc8fa627e8f04dbaeb80adafe4a16 Mon Sep 17 00:00:00 2001 From: Aliaksandr Dziarkach <18146690+AliaksandrDziarkach@users.noreply.github.com> Date: Tue, 12 Sep 2023 11:57:17 +0300 Subject: [PATCH 06/12] #1254 SMARTS with component-level grouping saved without '()' Fix UTs --- .../integration/tests/rsmiles/rsmiles_smarts.py | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/api/tests/integration/tests/rsmiles/rsmiles_smarts.py b/api/tests/integration/tests/rsmiles/rsmiles_smarts.py index bab98d62c1..f564e5c536 100644 --- a/api/tests/integration/tests/rsmiles/rsmiles_smarts.py +++ b/api/tests/integration/tests/rsmiles/rsmiles_smarts.py @@ -12,9 +12,16 @@ smarts_in = "([#8:1].[#6:2])>>([#8:1].[#6:2])" rxn1 = indigo.loadReactionSmarts(smarts_in) -assert rxn1.countReactants() == 1 -assert rxn1.countProducts() == 1 -print("SMARTS component-level grouping load ok") +if rxn1.countReactants() == 1 and rxn1.countProducts() == 1: + print("SMARTS component-level grouping load ok") +else: + print("SMARTS component-level grouping load failed") + print("rxn1.countReactants()=%s" % rxn1.countReactants()) + print("rxn1.countProducts()=%s" % rxn1.countProducts()) smarts_out = rxn1.smarts() -assert smarts_in == smarts_out -print("SMARTS component-level grouping save ok") +if smarts_in == smarts_out: + print("SMARTS component-level grouping save ok") +else: + print("SMARTS component-level grouping save failed") + print("smart_in=%s" % smarts_in) + print("smart_ou=%s" % smarts_out) From 6cb8611537db35cabd89350f8b483c5a4d319fe3 Mon Sep 17 00:00:00 2001 From: Aliaksandr Dziarkach <18146690+AliaksandrDziarkach@users.noreply.github.com> Date: Tue, 12 Sep 2023 19:26:53 +0300 Subject: [PATCH 07/12] #1254 SMARTS with component-level grouping saved without '()' Fix UTs --- core/indigo-core/molecule/src/query_molecule.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/indigo-core/molecule/src/query_molecule.cpp b/core/indigo-core/molecule/src/query_molecule.cpp index 4b0963007d..dfce5ee25b 100644 --- a/core/indigo-core/molecule/src/query_molecule.cpp +++ b/core/indigo-core/molecule/src/query_molecule.cpp @@ -26,7 +26,7 @@ using namespace indigo; -QueryMolecule::QueryMolecule() : spatial_constraints(*this) +QueryMolecule::QueryMolecule() : spatial_constraints(*this), _component_neighbors_valid(false) { } From 721450e4016d11fb2bfaa62fd5997e3406353504 Mon Sep 17 00:00:00 2001 From: Aliaksandr Dziarkach <18146690+AliaksandrDziarkach@users.noreply.github.com> Date: Wed, 13 Sep 2023 01:45:56 +0300 Subject: [PATCH 08/12] #1254 SMARTS with component-level grouping saved without '()' Fix UTs --- core/indigo-core/molecule/query_molecule.h | 1 - core/indigo-core/molecule/src/query_molecule.cpp | 6 ++---- core/indigo-core/tests/tests/reaction.cpp | 2 +- 3 files changed, 3 insertions(+), 6 deletions(-) diff --git a/core/indigo-core/molecule/query_molecule.h b/core/indigo-core/molecule/query_molecule.h index 1eaa2b2696..6bf0ff017b 100644 --- a/core/indigo-core/molecule/query_molecule.h +++ b/core/indigo-core/molecule/query_molecule.h @@ -401,7 +401,6 @@ namespace indigo PtrArray _bonds; std::list> _component_neighbors; - bool _component_neighbors_valid; void _calculateComponentNeighbors(); }; diff --git a/core/indigo-core/molecule/src/query_molecule.cpp b/core/indigo-core/molecule/src/query_molecule.cpp index dfce5ee25b..d72007611c 100644 --- a/core/indigo-core/molecule/src/query_molecule.cpp +++ b/core/indigo-core/molecule/src/query_molecule.cpp @@ -26,7 +26,7 @@ using namespace indigo; -QueryMolecule::QueryMolecule() : spatial_constraints(*this), _component_neighbors_valid(false) +QueryMolecule::QueryMolecule() : spatial_constraints(*this) { } @@ -2162,8 +2162,6 @@ void QueryMolecule::_calculateComponentNeighbors() std::list>& QueryMolecule::getComponentNeighbors() { - if (!_component_neighbors_valid) - _calculateComponentNeighbors(); - + _calculateComponentNeighbors(); return _component_neighbors; } \ No newline at end of file diff --git a/core/indigo-core/tests/tests/reaction.cpp b/core/indigo-core/tests/tests/reaction.cpp index 089f4b8965..81c5320ab1 100644 --- a/core/indigo-core/tests/tests/reaction.cpp +++ b/core/indigo-core/tests/tests/reaction.cpp @@ -66,7 +66,7 @@ TEST_F(IndigoCoreReactionTest, aliases_complex) QueryReaction reaction; loadQueryReaction("[#6:1]=[#6:2][#6:3].[#6:4]=[#6:5][#6:6]>>[#6:3][#6:2]=[#6:5][#6:6] |$;;R1;;;R2;R1;;;R2$|", reaction); reaction.clearAAM(); - ASSERT_STREQ("[#6]=[#6]-[#6].[#6]=[#6]-[#6]>>[#6]-[#6]=[#6]-[#6] |$;;R1;;;R2;R1;;;R2$|", saveReactionSmiles(reaction, true).c_str()); + ASSERT_STREQ("[#6]=[#6]-[#6].[#6]=[#6]-[#6]>>[#6]-[#6]=[#6]-[#6]", saveReactionSmiles(reaction, true).c_str()); ASSERT_STREQ("$RXN\n\n -INDIGO- 0100000000\n\n 2 1\n$MOL\n\n -INDIGO-01000000002D\n\n 3 2 0 0 0 0 0 0 0 0999 V2000\n 0.0000 0.0000 " "0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n 0.0000 0.0000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n 0.0000 0.0000 " " 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n 1 2 2 0 0 0 0\n 2 3 1 0 0 0 0\nA 3\nR1\nM END\n$MOL\n\n " From 51c01670348cfc0a7f4c5846890e25737703e644 Mon Sep 17 00:00:00 2001 From: Aliaksandr Dziarkach <18146690+AliaksandrDziarkach@users.noreply.github.com> Date: Wed, 13 Sep 2023 02:23:31 +0300 Subject: [PATCH 09/12] #1254 SMARTS with component-level grouping saved without '()' Fix UTs --- api/cpp/tests/basic/reaction.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/api/cpp/tests/basic/reaction.cpp b/api/cpp/tests/basic/reaction.cpp index d046228d65..cd1b21e78c 100644 --- a/api/cpp/tests/basic/reaction.cpp +++ b/api/cpp/tests/basic/reaction.cpp @@ -29,7 +29,7 @@ TEST(Reaction, automapReactionSmarts) const auto& session = IndigoSession::create(); auto reaction = session->loadReaction("[C:1]=[C:2][C:3].[C:4]=[C:5][C:6]>>[C:3][C:2]=[C:5][C:6]"); reaction.automap(IndigoAutomapMode::CLEAR); - ASSERT_STREQ("[#6]=[#6]-[#6].[#6]=[#6]-[#6]>>[#6]-[#6]=[#6]-[#6] |^3:0,3,^1:1,4,7,8|", reaction.smarts().c_str()); + ASSERT_STREQ("[#6]=[#6]-[#6].[#6]=[#6]-[#6]>>[#6]-[#6]=[#6]-[#6]", reaction.smarts().c_str()); } TEST(Reaction, automapQueryReactionSmarts) @@ -37,5 +37,5 @@ TEST(Reaction, automapQueryReactionSmarts) const auto& session = IndigoSession::create(); auto reaction = session->loadQueryReaction("[C:1]=[C:2][C:3].[C:4]=[C:5][C:6]>>[C:3][C:2]=[C:5][C:6] |$;;R1;;;R2;R1;;;R2$|"); reaction.automap(IndigoAutomapMode::CLEAR); - ASSERT_STREQ("[#6]=[#6]-[#6].[#6]=[#6]-[#6]>>[#6]-[#6]=[#6]-[#6] |$;;R1;;;R2;R1;;;R2$|", reaction.smarts().c_str()); + ASSERT_STREQ("[#6]=[#6]-[#6].[#6]=[#6]-[#6]>>[#6]-[#6]=[#6]-[#6]", reaction.smarts().c_str()); } From b95b6ab9de7303ad7d4cc35f1ba42cf9df938d8f Mon Sep 17 00:00:00 2001 From: Aliaksandr Dziarkach <18146690+AliaksandrDziarkach@users.noreply.github.com> Date: Wed, 13 Sep 2023 18:20:28 +0300 Subject: [PATCH 10/12] #1254 SMARTS with component-level grouping saved without '()' Fix UTs --- core/indigo-core/molecule/query_molecule.h | 5 +---- core/indigo-core/molecule/src/query_molecule.cpp | 11 +++-------- core/indigo-core/reaction/src/rsmiles_loader.cpp | 6 +++--- 3 files changed, 7 insertions(+), 15 deletions(-) diff --git a/core/indigo-core/molecule/query_molecule.h b/core/indigo-core/molecule/query_molecule.h index 6bf0ff017b..5fc637f9f4 100644 --- a/core/indigo-core/molecule/query_molecule.h +++ b/core/indigo-core/molecule/query_molecule.h @@ -372,7 +372,7 @@ namespace indigo // must belong to different connected components of the target molecule Array components; - std::list>& getComponentNeighbors(); + void getComponentNeighbors(std::list> &componentNeighbors); void invalidateAtom(int index, int mask) override; @@ -399,9 +399,6 @@ namespace indigo PtrArray _atoms; PtrArray _bonds; - - std::list> _component_neighbors; - void _calculateComponentNeighbors(); }; } // namespace indigo diff --git a/core/indigo-core/molecule/src/query_molecule.cpp b/core/indigo-core/molecule/src/query_molecule.cpp index d72007611c..404c892a15 100644 --- a/core/indigo-core/molecule/src/query_molecule.cpp +++ b/core/indigo-core/molecule/src/query_molecule.cpp @@ -2141,7 +2141,7 @@ void QueryMolecule::getQueryAtomLabel(int qa, Array& result) result.readString(it->second.c_str(), true); } -void QueryMolecule::_calculateComponentNeighbors() +void QueryMolecule::getComponentNeighbors(std::list> &componentNeighbors) { std::unordered_map> componentAtoms; for (int i = 0; i < components.size(); ++i) @@ -2152,16 +2152,11 @@ void QueryMolecule::_calculateComponentNeighbors() componentAtoms[componentId].insert(i); } } + componentNeighbors.clear(); for (auto elem : componentAtoms) { auto atoms = elem.second; if (atoms.size() > 1) - _component_neighbors.emplace_back(atoms); + componentNeighbors.emplace_back(atoms); } -} - -std::list>& QueryMolecule::getComponentNeighbors() -{ - _calculateComponentNeighbors(); - return _component_neighbors; } \ No newline at end of file diff --git a/core/indigo-core/reaction/src/rsmiles_loader.cpp b/core/indigo-core/reaction/src/rsmiles_loader.cpp index 8a841271ea..40b695ccaa 100644 --- a/core/indigo-core/reaction/src/rsmiles_loader.cpp +++ b/core/indigo-core/reaction/src/rsmiles_loader.cpp @@ -128,7 +128,7 @@ void RSmilesLoader::_loadReaction() { rcnt = std::make_unique(); r_loader.loadQueryMolecule(static_cast(*rcnt)); - rcnt_extNeibs = static_cast(*rcnt).getComponentNeighbors(); + static_cast(*rcnt).getComponentNeighbors(rcnt_extNeibs); } rcnt_aam.copy(rcnt->reaction_atom_mapping); @@ -162,7 +162,7 @@ void RSmilesLoader::_loadReaction() { ctlt = std::make_unique(); c_loader.loadQueryMolecule(static_cast(*ctlt)); - ctlt_extNeibs = static_cast(*ctlt).getComponentNeighbors(); + static_cast(*ctlt).getComponentNeighbors(ctlt_extNeibs); } ctlt_aam.copy(ctlt->reaction_atom_mapping); @@ -202,7 +202,7 @@ void RSmilesLoader::_loadReaction() { prod = std::make_unique(); p_loader.loadQueryMolecule(static_cast(*prod)); - prod_extNeibs = static_cast(*prod).getComponentNeighbors(); + static_cast(*prod).getComponentNeighbors(prod_extNeibs); } prod_aam.copy(prod->reaction_atom_mapping); From 2724b9ee198545b501bc832b14d41ea77a35996f Mon Sep 17 00:00:00 2001 From: Aliaksandr Dziarkach <18146690+AliaksandrDziarkach@users.noreply.github.com> Date: Wed, 13 Sep 2023 18:30:10 +0300 Subject: [PATCH 11/12] #1254 SMARTS with component-level grouping saved without '()' Fix formating. --- core/indigo-core/molecule/src/query_molecule.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/indigo-core/molecule/src/query_molecule.cpp b/core/indigo-core/molecule/src/query_molecule.cpp index 404c892a15..6a46a3175c 100644 --- a/core/indigo-core/molecule/src/query_molecule.cpp +++ b/core/indigo-core/molecule/src/query_molecule.cpp @@ -2141,7 +2141,7 @@ void QueryMolecule::getQueryAtomLabel(int qa, Array& result) result.readString(it->second.c_str(), true); } -void QueryMolecule::getComponentNeighbors(std::list> &componentNeighbors) +void QueryMolecule::getComponentNeighbors(std::list>& componentNeighbors) { std::unordered_map> componentAtoms; for (int i = 0; i < components.size(); ++i) From f2af479f5ca69ceb4ea77dd86e7d42679d7e6b6c Mon Sep 17 00:00:00 2001 From: Aliaksandr Dziarkach <18146690+AliaksandrDziarkach@users.noreply.github.com> Date: Wed, 13 Sep 2023 19:34:04 +0300 Subject: [PATCH 12/12] #1254 SMARTS with component-level grouping saved without '()' Fix formating --- core/indigo-core/molecule/query_molecule.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/indigo-core/molecule/query_molecule.h b/core/indigo-core/molecule/query_molecule.h index 5fc637f9f4..cd98fa75fd 100644 --- a/core/indigo-core/molecule/query_molecule.h +++ b/core/indigo-core/molecule/query_molecule.h @@ -372,7 +372,7 @@ namespace indigo // must belong to different connected components of the target molecule Array components; - void getComponentNeighbors(std::list> &componentNeighbors); + void getComponentNeighbors(std::list>& componentNeighbors); void invalidateAtom(int index, int mask) override;