From a333f0fa2683626b912da550ce1355921a5c0bdc Mon Sep 17 00:00:00 2001 From: Aliaksandr Dziarkach <18146690+AliaksandrDziarkach@users.noreply.github.com> Date: Thu, 5 Oct 2023 13:12:51 +0300 Subject: [PATCH] #1296 Add "customQuery" option support to ket format (#1297) --- .../ref/formats/ket_with_query.py.out | 6 + .../formats/ket_with_query_components.py.out | 2 - .../formats/ket_with_query_properties.py.out | 2 - .../tests/formats/ket_with_query.py | 45 ++++++ .../formats/ket_with_query_components.py | 35 ---- .../formats/ket_with_query_properties.py | 35 ---- .../formats/ref/ket_with_custom_query.ket | 123 +++++++++++++++ .../molecule/molecule_json_saver.h | 4 +- core/indigo-core/molecule/smiles_loader.h | 13 +- core/indigo-core/molecule/smiles_saver.h | 7 +- .../molecule/src/molecule_json_loader.cpp | 12 +- .../molecule/src/molecule_json_saver.cpp | 15 +- .../molecule/src/smiles_loader.cpp | 59 ++++--- .../indigo-core/molecule/src/smiles_saver.cpp | 149 ++++++++++-------- 14 files changed, 329 insertions(+), 178 deletions(-) create mode 100644 api/tests/integration/ref/formats/ket_with_query.py.out delete mode 100644 api/tests/integration/ref/formats/ket_with_query_components.py.out delete mode 100644 api/tests/integration/ref/formats/ket_with_query_properties.py.out create mode 100644 api/tests/integration/tests/formats/ket_with_query.py delete mode 100644 api/tests/integration/tests/formats/ket_with_query_components.py delete mode 100644 api/tests/integration/tests/formats/ket_with_query_properties.py create mode 100644 api/tests/integration/tests/formats/ref/ket_with_custom_query.ket diff --git a/api/tests/integration/ref/formats/ket_with_query.py.out b/api/tests/integration/ref/formats/ket_with_query.py.out new file mode 100644 index 0000000000..64ed0ae0d2 --- /dev/null +++ b/api/tests/integration/ref/formats/ket_with_query.py.out @@ -0,0 +1,6 @@ +*** KET with query components *** +ket_with_query_components.ket:SUCCEED +*** KET with query properties *** +ket_with_query_properties.ket:SUCCEED +*** KET with custom query *** +ket_with_custom_query.ket:SUCCEED diff --git a/api/tests/integration/ref/formats/ket_with_query_components.py.out b/api/tests/integration/ref/formats/ket_with_query_components.py.out deleted file mode 100644 index 78b56b42bc..0000000000 --- a/api/tests/integration/ref/formats/ket_with_query_components.py.out +++ /dev/null @@ -1,2 +0,0 @@ -*** KET with query components *** -ket_with_query_components.ket:SUCCEED diff --git a/api/tests/integration/ref/formats/ket_with_query_properties.py.out b/api/tests/integration/ref/formats/ket_with_query_properties.py.out deleted file mode 100644 index a9eab280e2..0000000000 --- a/api/tests/integration/ref/formats/ket_with_query_properties.py.out +++ /dev/null @@ -1,2 +0,0 @@ -*** KET with query properties *** -ket_with_query_properties.ket:SUCCEED diff --git a/api/tests/integration/tests/formats/ket_with_query.py b/api/tests/integration/tests/formats/ket_with_query.py new file mode 100644 index 0000000000..32a5838233 --- /dev/null +++ b/api/tests/integration/tests/formats/ket_with_query.py @@ -0,0 +1,45 @@ +import difflib +import os +import sys + + +def find_diff(a, b): + return "\n".join(difflib.unified_diff(a.splitlines(), b.splitlines())) + + +sys.path.append( + os.path.normpath( + os.path.join(os.path.abspath(__file__), "..", "..", "..", "common") + ) +) +from env_indigo import * # noqa + +indigo = Indigo() +indigo.setOption("json-saving-pretty", True) + +ref_path = joinPathPy("ref/", __file__) + + +def check_ket_file(name): + filename = os.path.join(ref_path, name) + + mol = indigo.loadQueryMoleculeFromFile(filename) + with open(filename, "r") as file: + ket_ref = file.read() + ket = mol.json() + diff = find_diff(ket_ref, ket) + if not diff: + print(name + ":SUCCEED") + else: + print(name + ":FAILED") + print(diff) + + +print("*** KET with query components ***") +check_ket_file("ket_with_query_components.ket") + +print("*** KET with query properties ***") +check_ket_file("ket_with_query_properties.ket") + +print("*** KET with custom query ***") +check_ket_file("ket_with_custom_query.ket") diff --git a/api/tests/integration/tests/formats/ket_with_query_components.py b/api/tests/integration/tests/formats/ket_with_query_components.py deleted file mode 100644 index a6ad072a39..0000000000 --- a/api/tests/integration/tests/formats/ket_with_query_components.py +++ /dev/null @@ -1,35 +0,0 @@ -import difflib -import os -import sys - - -def find_diff(a, b): - return "\n".join(difflib.unified_diff(a.splitlines(), b.splitlines())) - - -sys.path.append( - os.path.normpath( - os.path.join(os.path.abspath(__file__), "..", "..", "..", "common") - ) -) -from env_indigo import * # noqa - -indigo = Indigo() -indigo.setOption("json-saving-pretty", True) - -print("*** KET with query components ***") - -ref_path = joinPathPy("ref/", __file__) -name = "ket_with_query_components.ket" -filename = os.path.join(ref_path, name) - -mol = indigo.loadQueryMoleculeFromFile(filename) -with open(filename, "r") as file: - ket_ref = file.read() -ket = mol.json() -diff = find_diff(ket_ref, ket) -if not diff: - print(name + ":SUCCEED") -else: - print(name + ":FAILED") - print(diff) diff --git a/api/tests/integration/tests/formats/ket_with_query_properties.py b/api/tests/integration/tests/formats/ket_with_query_properties.py deleted file mode 100644 index 369ae7c28e..0000000000 --- a/api/tests/integration/tests/formats/ket_with_query_properties.py +++ /dev/null @@ -1,35 +0,0 @@ -import difflib -import os -import sys - - -def find_diff(a, b): - return "\n".join(difflib.unified_diff(a.splitlines(), b.splitlines())) - - -sys.path.append( - os.path.normpath( - os.path.join(os.path.abspath(__file__), "..", "..", "..", "common") - ) -) -from env_indigo import * # noqa - -indigo = Indigo() -indigo.setOption("json-saving-pretty", True) - -print("*** KET with query properties ***") - -ref_path = joinPathPy("ref/", __file__) -name = "ket_with_query_properties.ket" -filename = os.path.join(ref_path, name) - -mol = indigo.loadQueryMoleculeFromFile(filename) -with open(filename, "r") as file: - ket_ref = file.read() -ket = mol.json() -diff = find_diff(ket_ref, ket) -if not diff: - print(name + ":SUCCEED") -else: - print(name + ":FAILED") - print(diff) diff --git a/api/tests/integration/tests/formats/ref/ket_with_custom_query.ket b/api/tests/integration/tests/formats/ref/ket_with_custom_query.ket new file mode 100644 index 0000000000..0f324f83e8 --- /dev/null +++ b/api/tests/integration/tests/formats/ref/ket_with_custom_query.ket @@ -0,0 +1,123 @@ +{ + "root": { + "nodes": [ + { + "$ref": "mol0" + } + ] + }, + "mol0": { + "type": "molecule", + "atoms": [ + { + "label": "C", + "location": [ + 6.3348493576049809, + -5.550074577331543, + 0.0 + ] + }, + { + "label": "C", + "location": [ + 8.06515121459961, + -5.549589157104492, + 0.0 + ] + }, + { + "label": "C", + "location": [ + 7.2016377449035648, + -5.049966812133789, + 0.0 + ] + }, + { + "label": "C", + "location": [ + 8.06515121459961, + -6.55053186416626, + 0.0 + ] + }, + { + "label": "C", + "location": [ + 6.3348493576049809, + -6.555019855499268, + 0.0 + ] + }, + { + "label": "C", + "location": [ + 7.203820705413818, + -7.050033092498779, + 0.0 + ], + "queryProperties": { + "customQuery": "C&X3,N&X2" + } + } + ], + "bonds": [ + { + "type": 2, + "atoms": [ + 2, + 0 + ] + }, + { + "type": 2, + "atoms": [ + 3, + 1 + ] + }, + { + "type": 1, + "atoms": [ + 0, + 4 + ] + }, + { + "type": 1, + "atoms": [ + 1, + 2 + ] + }, + { + "type": 2, + "atoms": [ + 4, + 5 + ] + }, + { + "type": 1, + "atoms": [ + 5, + 3 + ] + } + ], + "sgroups": [ + { + "type": "MUL", + "atoms": [ + 0, + 1, + 2, + 3, + 4, + 5 + ], + "mul": 1 + } + ] + } +} \ No newline at end of file diff --git a/core/indigo-core/molecule/molecule_json_saver.h b/core/indigo-core/molecule/molecule_json_saver.h index e462c9b702..3011ebdea7 100644 --- a/core/indigo-core/molecule/molecule_json_saver.h +++ b/core/indigo-core/molecule/molecule_json_saver.h @@ -95,12 +95,12 @@ namespace indigo bool Int64(int64_t i64) { - return pretty_json ? _pretty_writer.Uint(i64) : _writer.Uint(i64); + return pretty_json ? _pretty_writer.Uint64(i64) : _writer.Uint64(i64); } bool Uint64(uint64_t u64) { - return pretty_json ? _pretty_writer.Uint(u64) : _writer.Uint(u64); + return pretty_json ? _pretty_writer.Uint64(u64) : _writer.Uint64(u64); } bool Double(double d) diff --git a/core/indigo-core/molecule/smiles_loader.h b/core/indigo-core/molecule/smiles_loader.h index f80adafec4..c85f8b6e69 100644 --- a/core/indigo-core/molecule/smiles_loader.h +++ b/core/indigo-core/molecule/smiles_loader.h @@ -67,6 +67,9 @@ namespace indigo bool ignore_bad_valence; bool ignore_no_chiral_flag{false}; + static void readSmartsAtomStr(const std::string& atom_str, std::unique_ptr& qatom); + static void readSmartsBondStr(const std::string& bond_str, std::unique_ptr& qbond); + protected: enum { @@ -178,14 +181,16 @@ namespace indigo void _handleCurlyBrace(_AtomDesc& atom, bool& inside_polymer); void _handlePolymerRepetition(int i); - void _readAtom(Array& atom_str, bool first_in_brackets, _AtomDesc& atom, std::unique_ptr& qatom); + static void _readAtom(Array& atom_str, bool first_in_brackets, _AtomDesc& atom, std::unique_ptr& qatom, + bool smarts_mode = false, bool inside_rsmiles = false); - bool _readAtomLogic(Array& atom_str, bool first_in_brackets, _AtomDesc& atom, std::unique_ptr& qatom); + static bool _readAtomLogic(Array& atom_str, bool first_in_brackets, _AtomDesc& atom, std::unique_ptr& qatom, + bool smarts_mode = false, bool inside_rsmiles = false); int _parseCurly(Array& curly, int& repetitions); - void _readBond(Array& bond_str, _BondDesc& bond, std::unique_ptr& qbond); - void _readBondSub(Array& bond_str, _BondDesc& bond, std::unique_ptr& qbond); + static void _readBond(Array& bond_str, _BondDesc& bond, std::unique_ptr& qbond, bool smarts_mode); + static void _readBondSub(Array& bond_str, _BondDesc& bond, std::unique_ptr& qbond, bool smarts_mode); void _readRGroupOccurrenceRanges(const char* str, Array& ranges); private: diff --git a/core/indigo-core/molecule/smiles_saver.h b/core/indigo-core/molecule/smiles_saver.h index 65981dff94..e11d10774f 100644 --- a/core/indigo-core/molecule/smiles_saver.h +++ b/core/indigo-core/molecule/smiles_saver.h @@ -80,6 +80,9 @@ namespace indigo const Array& getSavedCisTransParities(); + static std::string writeSmartsAtomStr(QueryMolecule::Atom* atom); + static std::string writeSmartsBondStr(QueryMolecule::Bond* bond); + protected: void _saveMolecule(); @@ -110,8 +113,8 @@ namespace indigo void _writeAtom(int idx, bool aromatic, bool lowercase, int chirality) const; void _writeChirality(int chirality) const; void _writeCharge(int charge) const; - void _writeSmartsAtom(int idx, QueryMolecule::Atom* atom, int chirality, int depth, bool has_or_parent, bool has_not_parent) const; - void _writeSmartsBond(int idx, QueryMolecule::Bond* bond, bool has_or_parent) const; + static void _writeSmartsAtom(Output& output, QueryMolecule::Atom* atom, int aam, int chirality, int depth, bool has_or_parent, bool has_not_parent); + static void _writeSmartsBond(Output& output, QueryMolecule::Bond* bond, bool has_or_parent); void _markCisTrans(); void _banSlashes(); int _calcBondDirection(int idx, int vprev); diff --git a/core/indigo-core/molecule/src/molecule_json_loader.cpp b/core/indigo-core/molecule/src/molecule_json_loader.cpp index 7c7c10a523..8fa7420a2c 100644 --- a/core/indigo-core/molecule/src/molecule_json_loader.cpp +++ b/core/indigo-core/molecule/src/molecule_json_loader.cpp @@ -12,6 +12,7 @@ #include "molecule/molecule.h" #include "molecule/molecule_sgroups.h" #include "molecule/query_molecule.h" +#include "molecule/smiles_loader.h" using namespace rapidjson; using namespace indigo; @@ -524,7 +525,10 @@ void MoleculeJsonLoader::parseAtoms(const rapidjson::Value& atoms, BaseMolecule& auto qProps = a["queryProperties"].GetObject(); if (qProps.HasMember("customQuery")) { - // Read custom query + std::string customQuery = qProps["customQuery"].GetString(); + std::unique_ptr atom = make_unique(); + SmilesLoader::readSmartsAtomStr(customQuery, atom); + _pqmol->resetAtom(atom_idx, atom.release()); } else { @@ -676,7 +680,9 @@ void MoleculeJsonLoader::parseBonds(const rapidjson::Value& bonds, BaseMolecule& if (b.HasMember("customQuery")) { std::string customQuery = b["customQuery"].GetString(); - // 2do process custom query + std::unique_ptr bond = make_unique(); + SmilesLoader::readSmartsBondStr(customQuery, bond); + _pqmol->resetBond(bond_idx, bond.release()); } if (b.HasMember("cip")) @@ -865,6 +871,8 @@ void MoleculeJsonLoader::parseSGroups(const rapidjson::Value& sgroups, BaseMolec _pqmol->components[atom_idx] = components_count; } } + else + throw Error("queryProperties is allowed only for queries"); continue; } int sg_type = SGroup::getType(sg_type_str.c_str()); diff --git a/core/indigo-core/molecule/src/molecule_json_saver.cpp b/core/indigo-core/molecule/src/molecule_json_saver.cpp index 3fc0b7bca7..adae77f582 100644 --- a/core/indigo-core/molecule/src/molecule_json_saver.cpp +++ b/core/indigo-core/molecule/src/molecule_json_saver.cpp @@ -26,6 +26,7 @@ #include "molecule/molecule_json_saver.h" #include "molecule/molecule_savers.h" #include "molecule/query_molecule.h" +#include "molecule/smiles_saver.h" using namespace indigo; using namespace rapidjson; @@ -416,7 +417,8 @@ void MoleculeJsonSaver::saveBonds(BaseMolecule& mol, JsonWriter& writer) int bond_order = mol.getBondOrder(i); if (bond_order < 0 && _pqmol) { - int qb = QueryMolecule::getQueryBondType(_pqmol->getBond(i)); + QueryMolecule::Bond& qbond = _pqmol->getBond(i); + int qb = QueryMolecule::getQueryBondType(qbond); if (qb == QueryMolecule::QUERY_BOND_SINGLE_OR_DOUBLE) bond_order = 5; else if (qb == QueryMolecule::QUERY_BOND_SINGLE_OR_AROMATIC) @@ -426,7 +428,13 @@ void MoleculeJsonSaver::saveBonds(BaseMolecule& mol, JsonWriter& writer) else if (qb == QueryMolecule::QUERY_BOND_ANY) bond_order = 8; if (bond_order < 0) - throw Error("Invalid query bond"); + { + // throw Error("Invalid query bond"); + + std::string customQuery = SmilesSaver::writeSmartsBondStr(&qbond); + writer.Key("customQuery"); + writer.String(customQuery.c_str()); + } } if (bond_order == BOND_ZERO && _pmol) @@ -886,8 +894,7 @@ void MoleculeJsonSaver::saveAtoms(BaseMolecule& mol, JsonWriter& writer) writer.StartObject(); if (needCustomQuery) { - // 2do generate customquery - std::string customQuery = ""; + std::string customQuery = SmilesSaver::writeSmartsAtomStr(&atom); writer.Key("customQuery"); writer.String(customQuery.c_str()); } diff --git a/core/indigo-core/molecule/src/smiles_loader.cpp b/core/indigo-core/molecule/src/smiles_loader.cpp index 275f1fef7b..d95ad8112e 100644 --- a/core/indigo-core/molecule/src/smiles_loader.cpp +++ b/core/indigo-core/molecule/src/smiles_loader.cpp @@ -1431,7 +1431,7 @@ void SmilesLoader::_parseMolecule() std::unique_ptr qbond = std::make_unique(); bond_str.readString(_pending_bonds_pool.at(_cycles[number].pending_bond_str), false); - _readBond(bond_str, *bond, qbond); + _readBond(bond_str, *bond, qbond, smarts_mode); bond->index = _qmol->addBond(bond->beg, bond->end, qbond.release()); } @@ -1542,7 +1542,7 @@ void SmilesLoader::_parseMolecule() // Such case is processed after } else - _readBond(bond_str, *bond, qbond); + _readBond(bond_str, *bond, qbond, smarts_mode); // The bond "directions" are already saved in _BondDesc::dir, // so we can safely discard them. We are doing that to succeed @@ -1625,7 +1625,7 @@ void SmilesLoader::_parseMolecule() else { bond_str.readString(str, false); - _readBond(bond_str, *bond, qbond); + _readBond(bond_str, *bond, qbond, smarts_mode); } if (_qmol != 0) @@ -1714,7 +1714,7 @@ void SmilesLoader::_parseMolecule() atom_str.push(_scanner.readChar()); } - _readAtom(atom_str, brackets, atom, qatom); + _readAtom(atom_str, brackets, atom, qatom, smarts_mode, inside_rsmiles); atom.brackets = brackets; if (_qmol != 0) @@ -2443,7 +2443,15 @@ void SmilesLoader::_loadMolecule() _validateStereoCenters(); } -void SmilesLoader::_readBond(Array& bond_str, _BondDesc& bond, std::unique_ptr& qbond) +void SmilesLoader::readSmartsBondStr(const std::string& bond_str, std::unique_ptr& qbond) +{ + _BondDesc bond; + Array ac_str; + ac_str.copy(bond_str.c_str(), bond_str.size()); + _readBond(ac_str, bond, qbond, true); +} + +void SmilesLoader::_readBond(Array& bond_str, _BondDesc& bond, std::unique_ptr& qbond, bool smarts_mode) { if (bond_str.find(';') != -1) { @@ -2451,7 +2459,7 @@ void SmilesLoader::_readBond(Array& bond_str, _BondDesc& bond, std::unique std::unique_ptr subqbond; int i; - if (_qmol == 0) + if (qbond == nullptr) throw Error("';' is allowed only within queries"); substring.clear(); @@ -2460,7 +2468,7 @@ void SmilesLoader::_readBond(Array& bond_str, _BondDesc& bond, std::unique if (i == bond_str.size() || bond_str[i] == ';') { subqbond = std::make_unique(); - _readBond(substring, bond, subqbond); + _readBond(substring, bond, subqbond, smarts_mode); qbond.reset(QueryMolecule::Bond::und(qbond.release(), subqbond.release())); substring.clear(); } @@ -2475,7 +2483,7 @@ void SmilesLoader::_readBond(Array& bond_str, _BondDesc& bond, std::unique std::unique_ptr subqbond; int i; - if (_qmol == 0) + if (qbond == nullptr) throw Error("',' is allowed only within queries"); substring.clear(); @@ -2484,7 +2492,7 @@ void SmilesLoader::_readBond(Array& bond_str, _BondDesc& bond, std::unique if (i == bond_str.size() || bond_str[i] == ',') { subqbond = std::make_unique(); - _readBond(substring, bond, subqbond); + _readBond(substring, bond, subqbond, smarts_mode); if (qbond->type == 0) qbond.reset(subqbond.release()); else @@ -2502,7 +2510,7 @@ void SmilesLoader::_readBond(Array& bond_str, _BondDesc& bond, std::unique std::unique_ptr subqbond; int i; - if (_qmol == 0) + if (qbond == nullptr) throw Error("'&' is allowed only within queries"); substring.clear(); @@ -2511,7 +2519,7 @@ void SmilesLoader::_readBond(Array& bond_str, _BondDesc& bond, std::unique if (i == bond_str.size() || bond_str[i] == '&') { subqbond = std::make_unique(); - _readBond(substring, bond, subqbond); + _readBond(substring, bond, subqbond, smarts_mode); qbond.reset(QueryMolecule::Bond::und(qbond.release(), subqbond.release())); substring.clear(); } @@ -2520,10 +2528,10 @@ void SmilesLoader::_readBond(Array& bond_str, _BondDesc& bond, std::unique } return; } - _readBondSub(bond_str, bond, qbond); + _readBondSub(bond_str, bond, qbond, smarts_mode); } -void SmilesLoader::_readBondSub(Array& bond_str, _BondDesc& bond, std::unique_ptr& qbond) +void SmilesLoader::_readBondSub(Array& bond_str, _BondDesc& bond, std::unique_ptr& qbond, bool smarts_mode) { BufferScanner scanner(bond_str); @@ -2640,7 +2648,8 @@ void SmilesLoader::_readBondSub(Array& bond_str, _BondDesc& bond, std::uni } } -bool SmilesLoader::_readAtomLogic(Array& atom_str, bool first_in_brackets, _AtomDesc& atom, std::unique_ptr& qatom) +bool SmilesLoader::_readAtomLogic(Array& atom_str, bool first_in_brackets, _AtomDesc& atom, std::unique_ptr& qatom, bool smarts_mode, + bool inside_rsmiles) { QS_DEF(Array, atom_str_copy); if (atom_str.size() < 1) @@ -2683,7 +2692,7 @@ bool SmilesLoader::_readAtomLogic(Array& atom_str, bool first_in_brackets, if (i == atom_str.size() || atom_str_copy[i] == ';') { subqatom = std::make_unique(); - _readAtom(substring, first_in_brackets && (k == 0), atom, subqatom); + _readAtom(substring, first_in_brackets && (k == 0), atom, subqatom, smarts_mode, inside_rsmiles); qatom.reset(QueryMolecule::Atom::und(qatom.release(), subqatom.release())); substring.clear(); k++; @@ -2709,7 +2718,7 @@ bool SmilesLoader::_readAtomLogic(Array& atom_str, bool first_in_brackets, if (i == atom_str.size() || atom_str_copy[i] == ',') { subqatom = std::make_unique(); - _readAtom(substring, first_in_brackets && (k == 0), atom, subqatom); + _readAtom(substring, first_in_brackets && (k == 0), atom, subqatom, smarts_mode, inside_rsmiles); if (qatom->type == 0) qatom.reset(subqatom.release()); else @@ -2738,7 +2747,7 @@ bool SmilesLoader::_readAtomLogic(Array& atom_str, bool first_in_brackets, if (i == atom_str.size() || atom_str_copy[i] == '&') { subqatom = std::make_unique(); - _readAtom(substring, first_in_brackets && (k == 0), atom, subqatom); + _readAtom(substring, first_in_brackets && (k == 0), atom, subqatom, smarts_mode, inside_rsmiles); qatom.reset(QueryMolecule::Atom::und(qatom.release(), subqatom.release())); substring.clear(); k++; @@ -2751,9 +2760,19 @@ bool SmilesLoader::_readAtomLogic(Array& atom_str, bool first_in_brackets, return true; } -void SmilesLoader::_readAtom(Array& atom_str, bool first_in_brackets, _AtomDesc& atom, std::unique_ptr& qatom) +void SmilesLoader::readSmartsAtomStr(const std::string& atom_str, std::unique_ptr& qatom) +{ + Pool::Elem> neipool; + _AtomDesc atom{neipool}; + Array ac_str; + ac_str.copy(atom_str.c_str(), atom_str.size()); + _readAtom(ac_str, true, atom, qatom, true, false); +} + +void SmilesLoader::_readAtom(Array& atom_str, bool first_in_brackets, _AtomDesc& atom, std::unique_ptr& qatom, bool smarts_mode, + bool inside_rsmiles) { - if (!_readAtomLogic(atom_str, first_in_brackets, atom, qatom)) + if (!_readAtomLogic(atom_str, first_in_brackets, atom, qatom, smarts_mode, inside_rsmiles)) return; BufferScanner scanner(atom_str); @@ -3355,7 +3374,7 @@ void SmilesLoader::_readAtom(Array& atom_str, bool first_in_brackets, _Ato scanner.skip(1); if (scanner.lookNext() == '?') { - if (_qmol == 0) + if (qatom.get() == 0) throw Error("ignorable AAM numbers are allowed only for queries"); atom.ignorable_aam = true; scanner.skip(1); diff --git a/core/indigo-core/molecule/src/smiles_saver.cpp b/core/indigo-core/molecule/src/smiles_saver.cpp index efa5a69d3a..db185f0728 100644 --- a/core/indigo-core/molecule/src/smiles_saver.cpp +++ b/core/indigo-core/molecule/src/smiles_saver.cpp @@ -536,7 +536,7 @@ void SmilesSaver::_saveMolecule() else if ((dir == 2 && v_idx == edge.end) || (dir == 1 && v_idx == edge.beg)) _output.writeChar('\\'); else if (smarts_mode && _qmol != 0) - _writeSmartsBond(e_idx, &_qmol->getBond(e_idx), false); + _writeSmartsBond(_output, &_qmol->getBond(e_idx), false); else if (bond_order == BOND_DOUBLE) _output.writeChar('='); else if (bond_order == BOND_TRIPLE) @@ -592,7 +592,10 @@ void SmilesSaver::_saveMolecule() if (!smarts_mode) _writeAtom(v_idx, _atoms[v_idx].aromatic, _atoms[v_idx].lowercase, _atoms[v_idx].chirality); else if (_qmol != 0) - _writeSmartsAtom(v_idx, &_qmol->getAtom(v_idx), _atoms[v_idx].chirality, 0, false, false); + { + int aam = _bmol->reaction_atom_mapping[v_idx]; + _writeSmartsAtom(_output, &_qmol->getAtom(v_idx), aam, _atoms[v_idx].chirality, 0, false, false); + } else throw Error("SMARTS format available for query only!"); @@ -947,12 +950,21 @@ static void writeAnd(Output& _output, QueryMolecule::Node* node, bool has_or_par _output.writeChar(';'); } -void SmilesSaver::_writeSmartsAtom(int idx, QueryMolecule::Atom* atom, int chirality, int depth, bool has_or_parent, bool has_not_parent) const +std::string SmilesSaver::writeSmartsAtomStr(QueryMolecule::Atom* atom) +{ + Array out; + ArrayOutput output(out); + _writeSmartsAtom(output, atom, -1, -1, 1, false, false); + std::string result{out.ptr(), static_cast(out.size())}; + return result; +} + +void SmilesSaver::_writeSmartsAtom(Output& output, QueryMolecule::Atom* atom, int aam, int chirality, int depth, bool has_or_parent, bool has_not_parent) { int i; if (depth == 0) - _output.printf("["); + output.printf("["); switch (atom->type) { @@ -963,11 +975,11 @@ void SmilesSaver::_writeSmartsAtom(int idx, QueryMolecule::Atom* atom, int chira } else if (QueryMolecule::isNotAtom(*atom, ELEM_H)) { - _output.printf("*"); + output.printf("*"); break; } - _output.writeChar('!'); - _writeSmartsAtom(idx, (QueryMolecule::Atom*)atom->children[0], chirality, depth + 1, has_or_parent, true); + output.writeChar('!'); + _writeSmartsAtom(output, static_cast(atom->children[0]), aam, chirality, depth + 1, has_or_parent, true); break; } case QueryMolecule::OP_AND: { @@ -975,7 +987,7 @@ void SmilesSaver::_writeSmartsAtom(int idx, QueryMolecule::Atom* atom, int chira bool has_aromatic = false; bool aromatic = false; char atom_name[10]; - int cur_pos = _output.tell(); + int cur_pos = output.tell(); for (i = 0; i < atom->children.size(); i++) { if (atom->children[i]->type == QueryMolecule::ATOM_NUMBER) @@ -993,7 +1005,7 @@ void SmilesSaver::_writeSmartsAtom(int idx, QueryMolecule::Atom* atom, int chira { // Convert a & #6 -> c, A & #6 -> C if (aromatic) atom_name[0] = tolower(atom_name[0]); - _output.printf("%s", atom_name); + output.printf("%s", atom_name); } for (i = 0; i < atom->children.size(); i++) { @@ -1011,12 +1023,12 @@ void SmilesSaver::_writeSmartsAtom(int idx, QueryMolecule::Atom* atom, int chira continue; } - if (_output.tell() > cur_pos) + if (output.tell() > cur_pos) { - _output.writeChar(has_or_parent ? '&' : ';'); - cur_pos = _output.tell(); + output.writeChar(has_or_parent ? '&' : ';'); + cur_pos = output.tell(); } - _writeSmartsAtom(idx, (QueryMolecule::Atom*)atom->children[i], chirality, depth + 1, has_or_parent, has_not_parent); + _writeSmartsAtom(output, static_cast(atom->children[i]), aam, chirality, depth + 1, has_or_parent, has_not_parent); } break; } @@ -1029,35 +1041,23 @@ void SmilesSaver::_writeSmartsAtom(int idx, QueryMolecule::Atom* atom, int chira } if (i > 0) - _output.printf(has_not_parent ? "!" : ","); - _writeSmartsAtom(idx, (QueryMolecule::Atom*)atom->children[i], chirality, depth + 1, true, has_not_parent); + output.printf(has_not_parent ? "!" : ","); + _writeSmartsAtom(output, static_cast(atom->children[i]), aam, chirality, depth + 1, true, has_not_parent); } break; } case QueryMolecule::ATOM_ISOTOPE: - _output.printf("%d", atom->value_max); + output.printf("%d", atom->value_max); break; case QueryMolecule::ATOM_NUMBER: { - _output.printf("#%d", atom->value_max); + output.printf("#%d", atom->value_max); if (chirality == 1) - _output.printf("@"); + output.printf("@"); else if (chirality == 2) - _output.printf("@@"); - - if (chirality > 0 || _bmol->getAtomRadical_NoThrow(idx, 0) > 0) - { - int hydro = _bmol->getAtomTotalH(idx); - - if (hydro > 1) - _output.printf("H%d", hydro); - else if (hydro == 1) - _output.printf("H"); - } - - int aam = _bmol->reaction_atom_mapping[idx]; + output.printf("@@"); if (aam > 0) - _output.printf(":%d", aam); + output.printf(":%d", aam); break; } @@ -1065,90 +1065,90 @@ void SmilesSaver::_writeSmartsAtom(int idx, QueryMolecule::Atom* atom, int chira int charge = atom->value_max; if (charge > 1) - _output.printf("+%d", charge); + output.printf("+%d", charge); else if (charge < -1) - _output.printf("-%d", -charge); + output.printf("-%d", -charge); else if (charge == 1) - _output.printf("+"); + output.printf("+"); else if (charge == -1) - _output.printf("-"); + output.printf("-"); else - _output.printf("+0"); + output.printf("+0"); break; } case QueryMolecule::ATOM_FRAGMENT: { if (atom->fragment->fragment_smarts.ptr() == 0) throw Error("fragment_smarts has unexpectedly gone"); - _output.printf("$(%s)", atom->fragment->fragment_smarts.ptr()); + output.printf("$(%s)", atom->fragment->fragment_smarts.ptr()); break; } case QueryMolecule::ATOM_AROMATICITY: { if (atom->value_min == ATOM_AROMATIC) - _output.printf("a"); + output.printf("a"); else - _output.printf("A"); + output.printf("A"); break; } case QueryMolecule::OP_NONE: - _output.writeChar('*'); + output.writeChar('*'); break; case QueryMolecule::ATOM_TOTAL_H: { - _write_num(_output, 'H', atom->value_min); + _write_num(output, 'H', atom->value_min); break; } case QueryMolecule::ATOM_SSSR_RINGS: { - _write_num_if_set(_output, 'R', atom->value_min, atom->value_max); + _write_num_if_set(output, 'R', atom->value_min, atom->value_max); break; } case QueryMolecule::ATOM_RING_BONDS_AS_DRAWN: { - _output.printf("x:%d", atom->value_min); + output.printf("x:%d", atom->value_min); break; } case QueryMolecule::ATOM_RING_BONDS: { - _write_num_if_set(_output, 'x', atom->value_min, atom->value_max); + _write_num_if_set(output, 'x', atom->value_min, atom->value_max); break; } case QueryMolecule::ATOM_IMPLICIT_H: { - _write_num_if_set(_output, 'h', atom->value_min, atom->value_max); + _write_num_if_set(output, 'h', atom->value_min, atom->value_max); break; } case QueryMolecule::ATOM_UNSATURATION: { - _output.printf("$([*,#1]=,#,:[*,#1])"); + output.printf("$([*,#1]=,#,:[*,#1])"); break; } case QueryMolecule::ATOM_SMALLEST_RING_SIZE: { - _write_num_if_set(_output, 'r', atom->value_min, atom->value_max); + _write_num_if_set(output, 'r', atom->value_min, atom->value_max); break; } case QueryMolecule::ATOM_SUBSTITUENTS: { - _output.printf("D%d", atom->value_min); + output.printf("D%d", atom->value_min); break; } case QueryMolecule::ATOM_SUBSTITUENTS_AS_DRAWN: { - _output.printf("D%d", atom->value_min); + output.printf("D%d", atom->value_min); break; } case QueryMolecule::ATOM_PSEUDO: { - _output.printf("*", atom->alias.ptr()); + output.printf("*", atom->alias.ptr()); break; } case QueryMolecule::ATOM_CONNECTIVITY: { - _output.printf("X%d", atom->value_min); + output.printf("X%d", atom->value_min); break; } case QueryMolecule::ATOM_TOTAL_BOND_ORDER: { - _write_num(_output, 'v', atom->value_min); + _write_num(output, 'v', atom->value_min); break; } @@ -1159,10 +1159,19 @@ void SmilesSaver::_writeSmartsAtom(int idx, QueryMolecule::Atom* atom, int chira } if (depth == 0) - _output.writeChar(']'); + output.writeChar(']'); } -void SmilesSaver::_writeSmartsBond(int idx, QueryMolecule::Bond* bond, bool has_or_parent) const +std::string SmilesSaver::writeSmartsBondStr(QueryMolecule::Bond* bond) +{ + Array out; + ArrayOutput output(out); + _writeSmartsBond(output, bond, false); + std::string result{out.ptr(), static_cast(out.size())}; + return result; +} + +void SmilesSaver::_writeSmartsBond(Output& output, QueryMolecule::Bond* bond, bool has_or_parent) { int i; @@ -1170,26 +1179,26 @@ void SmilesSaver::_writeSmartsBond(int idx, QueryMolecule::Bond* bond, bool has_ if (qb == QueryMolecule::QUERY_BOND_SINGLE_OR_DOUBLE) { - _output.writeString("-,="); + output.writeString("-,="); return; } switch (bond->type) { case QueryMolecule::OP_NONE: - _output.writeChar('~'); + output.writeChar('~'); break; case QueryMolecule::OP_NOT: { - _output.writeChar('!'); - _writeSmartsBond(idx, (QueryMolecule::Bond*)bond->children[0], has_or_parent); + output.writeChar('!'); + _writeSmartsBond(output, (QueryMolecule::Bond*)bond->children[0], has_or_parent); break; } case QueryMolecule::OP_OR: { for (i = 0; i < bond->children.size(); i++) { if (i > 0) - _output.printf(","); - _writeSmartsBond(idx, (QueryMolecule::Bond*)bond->children[i], true); + output.printf(","); + _writeSmartsBond(output, (QueryMolecule::Bond*)bond->children[i], true); } break; } @@ -1197,8 +1206,8 @@ void SmilesSaver::_writeSmartsBond(int idx, QueryMolecule::Bond* bond, bool has_ for (i = 0; i < bond->children.size(); i++) { if (i > 0) - _output.writeChar(has_or_parent ? '&' : ';'); - _writeSmartsBond(idx, (QueryMolecule::Bond*)bond->children[i], has_or_parent); + output.writeChar(has_or_parent ? '&' : ';'); + _writeSmartsBond(output, (QueryMolecule::Bond*)bond->children[i], has_or_parent); } break; } @@ -1206,22 +1215,22 @@ void SmilesSaver::_writeSmartsBond(int idx, QueryMolecule::Bond* bond, bool has_ int bond_order = bond->value; if (bond_order == BOND_SINGLE) - _output.writeChar('-'); + output.writeChar('-'); if (bond_order == BOND_DOUBLE) - _output.writeChar('='); + output.writeChar('='); else if (bond_order == BOND_TRIPLE) - _output.writeChar('#'); + output.writeChar('#'); else if (bond_order == BOND_AROMATIC) - _output.writeChar(':'); + output.writeChar(':'); else if (bond_order == BOND_SMARTS_UP) - _output.writeChar('/'); + output.writeChar('/'); else if (bond_order == BOND_SMARTS_DOWN) - _output.writeChar('\\'); + output.writeChar('\\'); break; } case QueryMolecule::BOND_TOPOLOGY: { if (bond->value == TOPOLOGY_RING) - _output.writeChar('@'); + output.writeChar('@'); break; } default:;