diff --git a/api/tests/integration/ref/formats/incorrect_smiles.py.out b/api/tests/integration/ref/formats/incorrect_smiles.py.out index 49c798fa27..fc0d05ff72 100644 --- a/api/tests/integration/ref/formats/incorrect_smiles.py.out +++ b/api/tests/integration/ref/formats/incorrect_smiles.py.out @@ -1,3 +1,3 @@ -O[C@H](N)C |&1:0| molecule auto loader: SMILES loader: atom 0 is not a stereocenter -O[C@H](N)C |o1:0| molecule auto loader: SMILES loader: atom 0 is not a stereocenter -O[C@H](N)C |a:3| molecule auto loader: SMILES loader: atom 3 is not a stereocenter +O[C@H](N)C |&1:0| molecule auto loader: stereocenters: stereocenter (0) pyramid must have at least 3 atoms +O[C@H](N)C |o1:0| molecule auto loader: stereocenters: stereocenter (0) pyramid must have at least 3 atoms +O[C@H](N)C |a:3| molecule auto loader: stereocenters: stereocenter (3) pyramid must have at least 3 atoms diff --git a/api/tests/integration/ref/formats/mol_to_smiles.py.out b/api/tests/integration/ref/formats/mol_to_smiles.py.out index 6125b3a629..900d00b2b0 100644 --- a/api/tests/integration/ref/formats/mol_to_smiles.py.out +++ b/api/tests/integration/ref/formats/mol_to_smiles.py.out @@ -1,2 +1,3 @@ C1%82(C%83O%84%85)OC%86(C(O%87%88%89)C(O%90%91)C%921O%93%94%95)O%96%97%98.[*:1]%96.[*:2]%93.[*:3]%84.[*:4]%90.[*:5]%87.[*:6]%94%85.[*:7]%95%97.[*:8]%83.[*:9]%91%88.[*:10]%89%98.[*:11]%86.[*:12]%92.[*:13]%82 |$;;;;;;;;;;;;_R1;_R2;_R3;_R4;_R5;_R6;_R7;_R8;_R9;_R10;_R11;_R12;_R13$| [O-][N+](C1=NN=NN1CC1N=NNN=1)=O +C1C(O)=C(C2C=CC(C)=CC=2N)C(C)=CC=1 |o1:3,r,wU:3.12| diff --git a/api/tests/integration/ref/formats/smiles.py.out b/api/tests/integration/ref/formats/smiles.py.out index 53e708998e..2292c76912 100644 --- a/api/tests/integration/ref/formats/smiles.py.out +++ b/api/tests/integration/ref/formats/smiles.py.out @@ -71,3 +71,6 @@ chemaxon: CCCC |Sg:n:0,1,2::ht| chemaxon: CCCCC |Sg:n:1,2,3::hh| +*** Atropisomers *** +atropisomer: +C1=CC=C(C)C(C2=C(N)C=C(C)C=C2)=C1O |o1:5,r,wU:5.4| diff --git a/api/tests/integration/tests/formats/mol_to_smiles.py b/api/tests/integration/tests/formats/mol_to_smiles.py index 4e298f6973..d502327f93 100644 --- a/api/tests/integration/tests/formats/mol_to_smiles.py +++ b/api/tests/integration/tests/formats/mol_to_smiles.py @@ -19,3 +19,10 @@ joinPathPy("molecules/1e-0.mol", __file__) ).smiles() ) + +indigo.setOption("ignore-stereochemistry-errors", True) +print( + indigo.loadMoleculeFromFile( + joinPathPy("molecules/atropisomer.mol", __file__) + ).smiles() +) diff --git a/api/tests/integration/tests/formats/smiles.py b/api/tests/integration/tests/formats/smiles.py index 144a8ed74f..43b7c866f6 100644 --- a/api/tests/integration/tests/formats/smiles.py +++ b/api/tests/integration/tests/formats/smiles.py @@ -115,3 +115,9 @@ for sm in mols_smiles: print("chemaxon:") print(indigo.loadMolecule(sm).smiles()) + +print("*** Atropisomers ***") +mols_smiles = ["C1C(O)=C(C2C=CC(C)=CC=2N)C(C)=CC=1 |o1:3,r,wU:3.12|"] +for sm in mols_smiles: + print("atropisomer:") + print(indigo.loadMolecule(sm).smiles()) diff --git a/core/indigo-core/molecule/molecule_stereocenters.h b/core/indigo-core/molecule/molecule_stereocenters.h index f0663ca621..faa1e68218 100644 --- a/core/indigo-core/molecule/molecule_stereocenters.h +++ b/core/indigo-core/molecule/molecule_stereocenters.h @@ -82,6 +82,8 @@ namespace indigo int* getPyramid(int idx); void setType(int idx, int type, int group); void setType(int idx, int type); + void setAtropisomeric(int idx, bool val); + bool isAtropisomeric(int idx); void invertPyramid(int idx); bool sameGroup(int idx1, int idx2); @@ -137,6 +139,7 @@ namespace indigo // [X, Y, Z, W] -- atom indices or -1 for implicit hydrogen // (X, Y, Z) go counterclockwise when looking from W. // if there are pure (implicit) hydrogen, it is W + bool is_atropisomeric = false; int pyramid[4]; }; diff --git a/core/indigo-core/molecule/smiles_loader.h b/core/indigo-core/molecule/smiles_loader.h index c77d552709..f80adafec4 100644 --- a/core/indigo-core/molecule/smiles_loader.h +++ b/core/indigo-core/molecule/smiles_loader.h @@ -164,6 +164,7 @@ namespace indigo void _loadMolecule(); void _parseMolecule(); void _loadParsedMolecule(); + void _validateStereoCenters(); void _calcStereocenters(); void _calcCisTrans(); diff --git a/core/indigo-core/molecule/smiles_saver.h b/core/indigo-core/molecule/smiles_saver.h index 23cf5a1241..65981dff94 100644 --- a/core/indigo-core/molecule/smiles_saver.h +++ b/core/indigo-core/molecule/smiles_saver.h @@ -128,6 +128,7 @@ namespace indigo void _writeRingBonds(); void _writeUnsaturated(); void _writeSubstitutionCounts(); + void _writeWedges(); bool _shouldWriteAromaticBond(int bond_idx); void _startExtension(); diff --git a/core/indigo-core/molecule/src/molecule_json_loader.cpp b/core/indigo-core/molecule/src/molecule_json_loader.cpp index 16ab4c3da4..e84b55cecf 100644 --- a/core/indigo-core/molecule/src/molecule_json_loader.cpp +++ b/core/indigo-core/molecule/src/molecule_json_loader.cpp @@ -1107,16 +1107,31 @@ void MoleculeJsonLoader::loadMolecule(BaseMolecule& mol, bool load_arrows) { if (mol.stereocenters.getType(sc._atom_idx) == 0) { - if (!stereochemistry_options.ignore_errors) + if (mol.isAtropisomerismReferenceAtom(sc._atom_idx)) + { + mol.stereocenters.add_ignore(mol, sc._atom_idx, sc._type, sc._group, false); + mol.stereocenters.setAtropisomeric(sc._atom_idx, true); + } + else if (stereochemistry_options.ignore_errors) + mol.addStereocentersIgnoreBad(sc._atom_idx, sc._type, sc._group, false); // add non-valid stereocenters + else throw Error("stereo type specified for atom #%d, but the bond " "directions does not say that it is a stereocenter", sc._atom_idx); - mol.addStereocentersIgnoreBad(sc._atom_idx, sc._type, sc._group, false); // add non-valid stereocenters } else mol.stereocenters.setType(sc._atom_idx, sc._type, sc._group); } + for (int i : mol.edges()) + { + if (mol.getBondDirection(i) > 0 && !sensible_bond_directions[i]) + { + if (!stereochemistry_options.ignore_errors) + throw Error("direction of bond #%d makes no sense", i); + } + } + MoleculeLayout ml(mol, false); ml.layout_orientation = UNCPECIFIED; ml.updateSGroups(); diff --git a/core/indigo-core/molecule/src/molecule_stereocenters.cpp b/core/indigo-core/molecule/src/molecule_stereocenters.cpp index 7b96c2ba01..254e55b656 100644 --- a/core/indigo-core/molecule/src/molecule_stereocenters.cpp +++ b/core/indigo-core/molecule/src/molecule_stereocenters.cpp @@ -708,6 +708,16 @@ void MoleculeStereocenters::setType(int idx, int type) _stereocenters.at(idx).type = type; } +void MoleculeStereocenters::setAtropisomeric(int idx, bool val) +{ + _stereocenters.at(idx).is_atropisomeric = val; +} + +bool MoleculeStereocenters::isAtropisomeric(int idx) +{ + return _stereocenters.at(idx).is_atropisomeric; +} + const int* MoleculeStereocenters::getPyramid(int idx) const { return _stereocenters.at(idx).pyramid; diff --git a/core/indigo-core/molecule/src/molfile_loader.cpp b/core/indigo-core/molecule/src/molfile_loader.cpp index 3b4a0492c5..970357a451 100644 --- a/core/indigo-core/molecule/src/molfile_loader.cpp +++ b/core/indigo-core/molecule/src/molfile_loader.cpp @@ -2064,7 +2064,10 @@ void MolfileLoader::_postLoad() if (_bmol->stereocenters.getType(i) == 0) { if (_bmol->isAtropisomerismReferenceAtom(i)) + { _bmol->stereocenters.add_ignore(*_bmol, i, _stereocenter_types[i], _stereocenter_groups[i], false); + _bmol->stereocenters.setAtropisomeric(i, true); + } else if (!stereochemistry_options.ignore_errors) throw Error("stereo type specified for atom #%d, but the bond " "directions does not say that it is a stereocenter", @@ -2074,10 +2077,12 @@ void MolfileLoader::_postLoad() _bmol->stereocenters.setType(i, _stereocenter_types[i], _stereocenter_groups[i]); } - if (!stereochemistry_options.ignore_errors) - for (i = 0; i < _bonds_num; i++) - if (_bmol->getBondDirection(i) > 0 && !_sensible_bond_directions[i]) + for (i = 0; i < _bonds_num; i++) + if (_bmol->getBondDirection(i) > 0 && !_sensible_bond_directions[i]) + { + if (!stereochemistry_options.ignore_errors) throw Error("direction of bond #%d makes no sense", i); + } _bmol->buildCisTrans(_ignore_cistrans.ptr()); diff --git a/core/indigo-core/molecule/src/smiles_loader.cpp b/core/indigo-core/molecule/src/smiles_loader.cpp index d8c73e6a3d..71a519e3da 100644 --- a/core/indigo-core/molecule/src/smiles_loader.cpp +++ b/core/indigo-core/molecule/src/smiles_loader.cpp @@ -302,28 +302,26 @@ void SmilesLoader::_readOtherStuff() if (c == 'w') // 'ANY' stereocenters { - bool skip = true; - - // TODO: up/down designators (usually come atom coordinates) -- skipped for now + char wmode = 0; if (_scanner.lookNext() == 'U') + wmode = 'U'; + if (_scanner.lookNext() == 'D') + wmode = 'D'; + + if (wmode) _scanner.skip(1); - else if (_scanner.lookNext() == 'D') - _scanner.skip(1); - else - skip = false; if (_scanner.readChar() != ':') - throw Error("colon expected after 'w'"); + throw Error("colon expected after 'w%c'", wmode); while (isdigit(_scanner.lookNext())) { - int idx = _scanner.readUnsigned(); - - if (!skip) + int atom_idx = _scanner.readUnsigned(); + if (!wmode) { // This either bond can mark stereocenter or cis-trans double bond // For example CC=CN |w:1.0| - const Vertex& v = _bmol->getVertex(idx); + const Vertex& v = _bmol->getVertex(atom_idx); bool found = false; for (int nei : v.neighbors()) { @@ -337,25 +335,41 @@ void SmilesLoader::_readOtherStuff() if (!found) { - if (!_bmol->isPossibleStereocenter(idx)) + if (!_bmol->isPossibleStereocenter(atom_idx)) { if (!stereochemistry_options.ignore_errors) - throw Error("chirality not possible on atom #%d", idx); + throw Error("chirality not possible on atom #%d", atom_idx); } else { // Check if the stereocenter has already been marked as any // For example [H]C1(O)c2ccnn2[C@@H](O)c2ccnn12 |r,w:1.0,1.1| - if (_bmol->stereocenters.getType(idx) != MoleculeStereocenters::ATOM_ANY) - _bmol->addStereocenters(idx, MoleculeStereocenters::ATOM_ANY, 0, false); + if (_bmol->stereocenters.getType(atom_idx) != MoleculeStereocenters::ATOM_ANY) + _bmol->addStereocenters(atom_idx, MoleculeStereocenters::ATOM_ANY, 0, false); } } } - if (_scanner.lookNext() == '.') // skip the bond index + if (_scanner.lookNext() == '.') { _scanner.skip(1); - _scanner.readUnsigned(); + auto bond_idx = _scanner.readUnsigned(); + if (wmode) + { + auto& v = _bmol->getEdge(bond_idx); + if (v.end == atom_idx) + _bmol->swapEdgeEnds(bond_idx); + if (v.beg == atom_idx) + { + _bmol->setBondDirection(bond_idx, wmode == 'U' ? BOND_UP : BOND_DOWN); + if (_bmol->isAtropisomerismReferenceAtom(atom_idx)) + { + if (!_bmol->stereocenters.exists(atom_idx)) + _bmol->addStereocenters(atom_idx, MoleculeStereocenters::ATOM_ANY, 0, false); + _bmol->stereocenters.setAtropisomeric(atom_idx, true); + } + } + } } if (_scanner.lookNext() == ',') @@ -376,8 +390,8 @@ void SmilesLoader::_readOtherStuff() _bmol->stereocenters.setType(idx, MoleculeStereocenters::ATOM_ABS, 0); _overtly_defined_abs.insert(idx); } - else if (!stereochemistry_options.ignore_errors) - throw Error("atom %d is not a stereocenter", idx); + else + _bmol->addStereocenters(idx, MoleculeStereocenters::ATOM_ABS, 0, false); if (_scanner.lookNext() == ',') _scanner.skip(1); @@ -396,8 +410,8 @@ void SmilesLoader::_readOtherStuff() if (_bmol->stereocenters.exists(idx)) _bmol->stereocenters.setType(idx, MoleculeStereocenters::ATOM_OR, groupno); - else if (!stereochemistry_options.ignore_errors) - throw Error("atom %d is not a stereocenter", idx); + else + _bmol->addStereocenters(idx, MoleculeStereocenters::ATOM_OR, groupno, false); if (_scanner.lookNext() == ',') _scanner.skip(1); @@ -413,11 +427,10 @@ void SmilesLoader::_readOtherStuff() while (isdigit(_scanner.lookNext())) { int idx = _scanner.readUnsigned(); - if (_bmol->stereocenters.exists(idx)) _bmol->stereocenters.setType(idx, MoleculeStereocenters::ATOM_AND, groupno); - else if (!stereochemistry_options.ignore_errors) - throw Error("atom %d is not a stereocenter", idx); + else + _bmol->addStereocenters(idx, MoleculeStereocenters::ATOM_AND, groupno, false); if (_scanner.lookNext() == ',') _scanner.skip(1); @@ -1327,6 +1340,20 @@ void SmilesLoader::_readOtherStuff() _bmol->removeAtoms(to_remove); } +void SmilesLoader::_validateStereoCenters() +{ + for (int i = _bmol->stereocenters.begin(); i < _bmol->stereocenters.end(); i = _bmol->stereocenters.next(i)) + { + auto atom_idx = _bmol->stereocenters.getAtomIndex(i); + if (_bmol->isPossibleStereocenter(atom_idx) || _bmol->isAtropisomerismReferenceAtom(atom_idx)) + continue; + if (stereochemistry_options.ignore_errors) + _bmol->stereocenters.remove(i); + else + throw Error("atom %d is not a stereocenter", atom_idx); + } +} + void SmilesLoader::loadSMARTS(QueryMolecule& mol) { mol.clear(); @@ -2412,6 +2439,7 @@ void SmilesLoader::_loadMolecule() _parseMolecule(); _loadParsedMolecule(); + _validateStereoCenters(); } void SmilesLoader::_readBond(Array& bond_str, _BondDesc& bond, std::unique_ptr& qbond) diff --git a/core/indigo-core/molecule/src/smiles_saver.cpp b/core/indigo-core/molecule/src/smiles_saver.cpp index c91ffc0af4..7a7413838f 100644 --- a/core/indigo-core/molecule/src/smiles_saver.cpp +++ b/core/indigo-core/molecule/src/smiles_saver.cpp @@ -303,7 +303,7 @@ void SmilesSaver::_saveMolecule() stereocenters.get(i, atom_idx, type, group, pyramid); - if (type < MoleculeStereocenters::ATOM_AND) + if (type < MoleculeStereocenters::ATOM_AND || stereocenters.isAtropisomeric(atom_idx)) continue; int implicit_h_idx = -1; @@ -685,6 +685,7 @@ void SmilesSaver::_saveMolecule() _writeRingBonds(); _writeUnsaturated(); _writeSubstitutionCounts(); + _writeWedges(); if (_comma) _output.writeChar('|'); @@ -1896,6 +1897,38 @@ void SmilesSaver::_writeSubstitutionCounts() } } +void SmilesSaver::_writeWedges() +{ + bool is_first = true; + + if (_bmol) + { + for (int i = 0; i < _written_bonds.size(); ++i) + { + auto bond_idx = _written_bonds[i]; + auto& e = _bmol->getEdge(bond_idx); + if (_bmol->stereocenters.exists(e.beg) && _bmol->stereocenters.isAtropisomeric(e.beg)) + { + auto bdir = _bmol->getBondDirection(bond_idx); + if (bdir && bdir < BOND_EITHER) + { + if (is_first) + { + _startExtension(); + _output.writeString(bdir == BOND_UP ? "wU:" : "wD:"); + is_first = false; + } + else + _output.writeString(","); + const auto& edge = _bmol->getEdge(bond_idx); + auto wa_idx = _written_atoms.find(edge.beg); + _output.printf("%d.%d", wa_idx, i); + } + } + } + } +} + void SmilesSaver::_writeRingBonds() { bool is_first = true;