From 18bd5dc4f4a43555a1995befd5b69c6911120213 Mon Sep 17 00:00:00 2001 From: Aliaksandr Dziarkach <18146690+AliaksandrDziarkach@users.noreply.github.com> Date: Tue, 8 Aug 2023 04:15:50 +0300 Subject: [PATCH] #1200 MolfileSaver add Data S-Groups to molecule at saveMolecule() (#1208) --- .../integration/ref/deco/deco_sdf.py.out | 4 +- core/indigo-core/molecule/molecule_sgroups.h | 6 ++ .../molecule/src/molecule_sgroups.cpp | 17 +++++ .../molecule/src/molfile_loader.cpp | 6 +- .../molecule/src/molfile_saver.cpp | 69 +++++++++++-------- core/indigo-core/tests/tests/formats.cpp | 14 ++-- .../tests/tests/structure_checker.cpp | 2 +- 7 files changed, 77 insertions(+), 41 deletions(-) diff --git a/api/tests/integration/ref/deco/deco_sdf.py.out b/api/tests/integration/ref/deco/deco_sdf.py.out index 7ad2e864f8..64e2edbada 100644 --- a/api/tests/integration/ref/deco/deco_sdf.py.out +++ b/api/tests/integration/ref/deco/deco_sdf.py.out @@ -29762,7 +29762,7 @@ $END MOL mapped scaffold: OCC1OC(O)C(O)C(O)C1O RGROUP #1 - fragment #0: c12c(Cl)c(ccc1[nH]cc%912)Br.[*:1]%91 |$;;;;;;;;;;;_AP1$,SgD:5:MRV_IMPLICIT_H:IMPL_H1::: :| + fragment #0: c12c(Cl)c(ccc1[nH]cc%912)Br.[*:1]%91 |$;;;;;;;;;;;_AP1$| C1(COP(=O)(O)O)C(O)C(C(O)C(OP(=O)(O)O)O1)O.C1CCCCC1N.C1CCCCC1N.C1CCCCC1N.C1CCCCC1N.O |ha:0,1,2,7,8,9,10,11,12,13,18,19,hb:0,1,6,7,8,9,10,11,12,17,18,19| decomposed molecule: C1(CO%91)C(O)C(C(O)C(O%92)O1)O.[*:3]%91.[*:1]%92 |$;;;;;;;;;;;;_R3;_R1$,RG:_R1={P%91(=O)(O)O.[*:1]%91 |$;;;;_AP1$|},_R3={P%91(=O)(O)O.[*:1]%91 |$;;;;_AP1$|}| decomposed molecule: $MDL REV 1 0100000000 @@ -48077,7 +48077,7 @@ $END MOL mapped scaffold: OCC1OC(O)C(O)C(O)C1O RGROUP #1 - fragment #0: c1%91c[nH]c2ccc(Br)c(Cl)c12.[*:1]%91 |$;;;;;;;;;;;_AP1$,SgD:5:MRV_IMPLICIT_H:IMPL_H1::: :| + fragment #0: c1%91c[nH]c2ccc(Br)c(Cl)c12.[*:1]%91 |$;;;;;;;;;;;_AP1$| RGROUP #2 fragment #0: C1%91C(O)C(C(OC2C(O)C(C(O)C(CO)O2)O)C(CO)O1)O.[*:1]%91 |$;;;;;;;;;;;;;;;;;;;;;;_AP1$| C1(CO)(OC(C(O)C1OC1C(O)C(C(O)C(CO)O1)O)CO)OC1C(O)C(C(O)C(CO)O1)O.O |ha:22,23,24,25,26,27,28,29,30,31,32,33,hb:24,25,26,27,28,29,30,31,32,33,34,35| diff --git a/core/indigo-core/molecule/molecule_sgroups.h b/core/indigo-core/molecule/molecule_sgroups.h index b6ed67d974..df7c75811a 100644 --- a/core/indigo-core/molecule/molecule_sgroups.h +++ b/core/indigo-core/molecule/molecule_sgroups.h @@ -148,6 +148,12 @@ namespace indigo int num_chars; // number of characters int dasp_pos; char tag; // tag + static constexpr char mrv_implicit_h[] = "MRV_IMPLICIT_H"; + static constexpr char impl_prefix[] = "IMPL_H"; + static constexpr size_t impl_prefix_len = sizeof(impl_prefix) - 1; + bool isMrv_implicit(); + void setMrv_implicit(int atom_idx, int hydrogens_count); + private: DataSGroup(const DataSGroup&); }; diff --git a/core/indigo-core/molecule/src/molecule_sgroups.cpp b/core/indigo-core/molecule/src/molecule_sgroups.cpp index 07a30ce4dc..50b12db1af 100644 --- a/core/indigo-core/molecule/src/molecule_sgroups.cpp +++ b/core/indigo-core/molecule/src/molecule_sgroups.cpp @@ -82,6 +82,23 @@ DataSGroup::~DataSGroup() { } +constexpr char DataSGroup::mrv_implicit_h[]; +constexpr char DataSGroup::impl_prefix[]; + +bool DataSGroup::isMrv_implicit() +{ + return name.size() == sizeof(mrv_implicit_h) && strncmp(name.ptr(), mrv_implicit_h, name.size()) == 0; +} + +void DataSGroup::setMrv_implicit(int atom_idx, int hydrogens_count) +{ + atoms.push(atom_idx); + std::string sdata = impl_prefix + std::to_string(hydrogens_count); + data.readString(sdata.c_str(), true); + name.readString(mrv_implicit_h, true); + detached = true; +} + Superatom::Superatom() { sgroup_type = SGroup::SG_TYPE_SUP; diff --git a/core/indigo-core/molecule/src/molfile_loader.cpp b/core/indigo-core/molecule/src/molfile_loader.cpp index 8e7182907a..5f454c9d32 100644 --- a/core/indigo-core/molecule/src/molfile_loader.cpp +++ b/core/indigo-core/molecule/src/molfile_loader.cpp @@ -1939,11 +1939,11 @@ void MolfileLoader::_postLoad() SGroup& sgroup = _bmol->sgroups.getSGroup(i); if (sgroup.sgroup_type == SGroup::SG_TYPE_DAT) { - DataSGroup& dsg = (DataSGroup&)sgroup; - if (dsg.name.size() > 0 && strncmp(dsg.name.ptr(), "MRV_IMPLICIT_H", 14) == 0) + DataSGroup& dsg = static_cast(sgroup); + if (dsg.isMrv_implicit()) { BufferScanner scanner(dsg.data); - scanner.skip(6); // IMPL_H + scanner.skip(DataSGroup::impl_prefix_len); // IMPL_H int hcount = scanner.readInt1(); int k = dsg.atoms[0]; diff --git a/core/indigo-core/molecule/src/molfile_saver.cpp b/core/indigo-core/molecule/src/molfile_saver.cpp index ab4f093d48..66b3054629 100644 --- a/core/indigo-core/molecule/src/molfile_saver.cpp +++ b/core/indigo-core/molecule/src/molfile_saver.cpp @@ -80,6 +80,32 @@ void convert_xyz_to_string(Vec3f& xyz, std::stringstream& coords) write_c(xyz.z, coords); } +/* + * Remove added implicit sgroups + */ +void _removeImplicitSGroups(BaseMolecule& mol, std::list& implicit_sgroups_indexes) +{ + for (int idx : implicit_sgroups_indexes) + { + SGroup& sg = mol.sgroups.getSGroup(idx); + if (sg.sgroup_type == SGroup::SG_TYPE_DAT) + { + DataSGroup& dsg = static_cast(sg); + if (dsg.isMrv_implicit()) + { + mol.sgroups.remove(idx); + } + else + { + throw MolfileSaver::Error("internal: wanted mrv_implicit sgroup but got other"); + } + } + else + { + throw MolfileSaver::Error("internal: wanted data sgroup but got other"); + } + } +} void MolfileSaver::saveBaseMolecule(BaseMolecule& mol) { _saveMolecule(mol, mol.isQueryMolecule()); @@ -352,6 +378,7 @@ void MolfileSaver::_writeCtab(Output& output, BaseMolecule& mol, bool query) int i; int iw = 1; QS_DEF(Array, buf); + std::list implicit_sgroups_indexes; _atom_mapping.clear_resize(mol.vertexEnd()); _bond_mapping.clear_resize(mol.edgeEnd()); @@ -471,18 +498,9 @@ void MolfileSaver::_writeCtab(Output& output, BaseMolecule& mol, bool query) int sg_idx; sg_idx = mol.sgroups.addSGroup(SGroup::SG_TYPE_DAT); - DataSGroup& sgroup = (DataSGroup&)mol.sgroups.getSGroup(sg_idx); - - sgroup.atoms.push(i); - - QS_DEF(Array, tmp_buf); - ArrayOutput tmp_out(tmp_buf); - tmp_out.printf("IMPL_H%d", hcount); - tmp_buf.push(0); - sgroup.data.readString(tmp_buf.ptr(), true); - - sgroup.name.readString("MRV_IMPLICIT_H", true); - sgroup.detached = true; + implicit_sgroups_indexes.push_front(sg_idx); + DataSGroup& sgroup = static_cast(mol.sgroups.getSGroup(sg_idx)); + sgroup.setMrv_implicit(i, hcount); } if (radical > 0) @@ -786,7 +804,7 @@ void MolfileSaver::_writeCtab(Output& output, BaseMolecule& mol, bool query) } else if (sgroup.sgroup_type == SGroup::SG_TYPE_SUP) { - Superatom& sup = (Superatom&)sgroup; + Superatom& sup = static_cast(sgroup); if (sup.bond_connections.size() > 0) { for (int j = 0; j < sup.bond_connections.size(); j++) @@ -827,7 +845,7 @@ void MolfileSaver::_writeCtab(Output& output, BaseMolecule& mol, bool query) } else if (sgroup.sgroup_type == SGroup::SG_TYPE_DAT) { - DataSGroup& dsg = (DataSGroup&)sgroup; + DataSGroup& dsg = static_cast(sgroup); const char* name = dsg.name.ptr(); if (name != 0 && strlen(name) > 0) @@ -904,7 +922,7 @@ void MolfileSaver::_writeCtab(Output& output, BaseMolecule& mol, bool query) } else if (sgroup.sgroup_type == SGroup::SG_TYPE_SRU) { - RepeatingUnit& ru = (RepeatingUnit&)sgroup; + RepeatingUnit& ru = static_cast(sgroup); if (ru.connectivity == SGroup::HEAD_TO_HEAD) out.printf(" CONNECT=HH"); else if (ru.connectivity == SGroup::HEAD_TO_TAIL) @@ -922,7 +940,7 @@ void MolfileSaver::_writeCtab(Output& output, BaseMolecule& mol, bool query) } else if (sgroup.sgroup_type == SGroup::SG_TYPE_MUL) { - MultipleGroup& mg = (MultipleGroup&)sgroup; + MultipleGroup& mg = static_cast(sgroup); if (mg.parent_atoms.size() > 0) { out.printf(" PATOMS=(%d", mg.parent_atoms.size()); @@ -940,6 +958,7 @@ void MolfileSaver::_writeCtab(Output& output, BaseMolecule& mol, bool query) } } output.writeStringCR("M V30 END SGROUP"); + _removeImplicitSGroups(mol, implicit_sgroups_indexes); } output.writeStringCR("M V30 END CTAB"); @@ -1091,6 +1110,7 @@ void MolfileSaver::_writeCtab2000(Output& output, BaseMolecule& mol, bool query) QS_DEF(Array, substitution_count); QS_DEF(Array, ring_bonds); QS_DEF(Array, aliases); + std::list implicit_sgroups_indexes; _atom_mapping.clear_resize(mol.vertexEnd()); _bond_mapping.clear_resize(mol.edgeEnd()); @@ -1282,19 +1302,9 @@ void MolfileSaver::_writeCtab2000(Output& output, BaseMolecule& mol, bool query) int sg_idx; sg_idx = mol.sgroups.addSGroup(SGroup::SG_TYPE_DAT); - DataSGroup& sgroup = (DataSGroup&)mol.sgroups.getSGroup(sg_idx); - - sgroup.atoms.push(i); - - QS_DEF(Array, tmp_buf); - ArrayOutput tmp_out(tmp_buf); - tmp_buf.clear(); - tmp_out.printf("IMPL_H%d", hydrogens_count); - tmp_buf.push(0); - sgroup.data.readString(tmp_buf.ptr(), true); - - sgroup.name.readString("MRV_IMPLICIT_H", true); - sgroup.detached = true; + implicit_sgroups_indexes.push_front(sg_idx); + DataSGroup& sgroup = static_cast(mol.sgroups.getSGroup(sg_idx)); + sgroup.setMrv_implicit(i, hydrogens_count); hydrogens_count = 0; } @@ -1768,6 +1778,7 @@ void MolfileSaver::_writeCtab2000(Output& output, BaseMolecule& mol, bool query) } } } + _removeImplicitSGroups(mol, implicit_sgroups_indexes); } void MolfileSaver::_writeFormattedString(Output& output, Array& str, int length) diff --git a/core/indigo-core/tests/tests/formats.cpp b/core/indigo-core/tests/tests/formats.cpp index d6b0d3efc5..9d4c960daa 100644 --- a/core/indigo-core/tests/tests/formats.cpp +++ b/core/indigo-core/tests/tests/formats.cpp @@ -273,7 +273,7 @@ TEST_F(IndigoCoreFormatsTest, mol_saver_issue_1200) { Molecule t_mol; - loadMolecule(R"( + const char* mol = R"( -INDIGO-07262316452D 6 6 0 0 0 0 0 0 0 0999 V2000 @@ -308,16 +308,18 @@ M SDT 4 MRV_IMPLICIT_H M SDD 4 0.0000 0.0000 DA ALL 1 1 M SED 4 IMPL_H1 M END -)", - t_mol); +)"; + loadMolecule(mol, t_mol); ASSERT_EQ(t_mol.sgroups.getSGroupCount(), 0); Array out; ArrayOutput std_out(out); MolfileSaver saver(std_out); saver.saveMolecule(t_mol); - ASSERT_EQ(t_mol.sgroups.getSGroupCount(), 2); + ASSERT_EQ(t_mol.sgroups.getSGroupCount(), 0); + saver.mode = MolfileSaver::MODE_2000; saver.saveMolecule(t_mol); - ASSERT_EQ(t_mol.sgroups.getSGroupCount(), 4); + ASSERT_EQ(t_mol.sgroups.getSGroupCount(), 0); + saver.mode = MolfileSaver::MODE_3000; saver.saveMolecule(t_mol); - ASSERT_EQ(t_mol.sgroups.getSGroupCount(), 6); + ASSERT_EQ(t_mol.sgroups.getSGroupCount(), 0); } diff --git a/core/indigo-core/tests/tests/structure_checker.cpp b/core/indigo-core/tests/tests/structure_checker.cpp index 0f24460804..75b36ea9cc 100644 --- a/core/indigo-core/tests/tests/structure_checker.cpp +++ b/core/indigo-core/tests/tests/structure_checker.cpp @@ -49,7 +49,7 @@ TEST_F(IndigoCoreStructureCheckTest, radical) TEST_F(IndigoCoreStructureCheckTest, issue731_stereo) { Molecule molecule; - char* mol = R"({"root":{"nodes":[{"$ref":"rg2"}]}, + const char* mol = R"({"root":{"nodes":[{"$ref":"rg2"}]}, "header":{"moleculeName":"null"},"rg2":{"rlogic":{"number":2},"type":"rgroup", "atoms":[{"label":"C","location":[14.808632653403645,17.2218585385598,0]}, {"label":"C","location":[13.997755777985581,16.696221303012916,0]},