Skip to content

Commit

Permalink
Backmerge: #1240 Unable to open the CDX file with an R-Group added to…
Browse files Browse the repository at this point in the history
… the whole structure (#1269)
  • Loading branch information
even1024 authored Oct 12, 2023
1 parent 5ee0dbc commit fabcf73
Show file tree
Hide file tree
Showing 10 changed files with 153 additions and 24 deletions.
1 change: 1 addition & 0 deletions api/tests/integration/ref/formats/cdx_export.py.out
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
enhanced_stereo1:success
enhanced_stereo2:success
enhanced_stereo3:success
rgroup:success
stereo_either-0020:success
two_bn:success
*** KET to CDX ***
Expand Down
1 change: 1 addition & 0 deletions api/tests/integration/tests/formats/cdx_export.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ def find_diff(a, b):
"enhanced_stereo2",
"enhanced_stereo3",
"two_bn",
"rgroup",
]

ref_path = joinPathPy("ref/", __file__)
Expand Down
31 changes: 31 additions & 0 deletions api/tests/integration/tests/formats/molecules/rgroup.mol
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@

-INDIGO-08242309062D

0 0 0 0 0 0 0 0 0 0 0 V3000
M V30 BEGIN CTAB
M V30 COUNTS 0 0 0 0 0
M V30 BEGIN ATOM
M V30 END ATOM
M V30 BEGIN BOND
M V30 END BOND
M V30 END CTAB
M V30 BEGIN RGROUP 1
M V30 RLOGIC 0 0
M V30 BEGIN CTAB
M V30 COUNTS 5 4 0 0 0
M V30 BEGIN ATOM
M V30 1 C 20.2901 -12.6571 0.0 0
M V30 2 C 19.424 -12.1571 0.0 0
M V30 3 C 18.558 -12.6571 0.0 0
M V30 4 C 17.692 -12.1571 0.0 0
M V30 5 C 16.826 -12.6571 0.0 0 ATTCHPT=1
M V30 END ATOM
M V30 BEGIN BOND
M V30 1 1 2 1
M V30 2 1 3 2
M V30 3 1 4 3
M V30 4 1 5 4
M V30 END BOND
M V30 END CTAB
M V30 END RGROUP
M END
1 change: 1 addition & 0 deletions api/tests/integration/tests/formats/ref/rgroup.b64cdx
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
VmpDRDAxMDAEAwIBAAAAAAAAAAAAAAAAAAAAAAUIBAAAAB4AGggCAAMAGwgCAAQAAAEkAAAAAgACAOn9BQBBcmlhbAMA6f0PAFRpbWVzIE5ldyBSb21hbgADMgAIAP///////wAAAAAAAP//AAAAAP////8AAAAA//8AAAAA/////wAAAAD/////AAD//wGAAAAAABAIAgABAA8IAgABAAOABAAAAAAACoAFAAAAAgsCAAEAA4AGAAAABIAHAAAAAAIIAIa2mQH4s34CAAAEgAgAAAAAAggAhraKAVC4ZAIAAASACQAAAAACCACGtpkBcL1KAgAABIAKAAAAAAIIAIa2igGQwjACAAAEgAsAAAAABAIADAAAAggAhraZAbDHFgIAAAWADAAAAAQGBAAIAAAABQYEAAcAAAAAAAWADQAAAAQGBAAJAAAABQYEAAgAAAAAAAWADgAAAAQGBAAKAAAABQYEAAkAAAAAAAWADwAAAAQGBAALAAAABQYEAAoAAAAAAAAABoAAAAAAAAIIAIa2igGwxxYCCAcBAAAABw4AAQAAAAMAYADIAAAAUjEAAAAAAAAAAAAA
15 changes: 15 additions & 0 deletions core/indigo-core/common/base_cpp/scanner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
#include <stdio.h>
#include <string.h>

#include <array>
#include <cppcodec/base64_default_rfc4648.hpp>

#include "base_c/defs.h"
Expand Down Expand Up @@ -383,6 +384,20 @@ void Scanner::skipSpace()
skip(1);
}

void Scanner::skipBom()
{
long long pos = tell();
const int kBOMSize = 3;
const std::array<unsigned char, kBOMSize> kBOM = {0xEF, 0xBB, 0xBF};
if (length() >= kBOMSize)
{
std::array<unsigned char, kBOMSize> bom;
readCharsFix(kBOMSize, (char*)bom.data());
if (bom != kBOM)
seek(pos, SEEK_SET);
}
}

void Scanner::skipUntil(const char* delimiters)
{
while (strchr(delimiters, lookNext()) == nullptr)
Expand Down
1 change: 1 addition & 0 deletions core/indigo-core/common/base_cpp/scanner.h
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ namespace indigo
float readFloatFix(int digits);
int readIntFix(int digits);
void skipSpace();
void skipBom();

void skipUntil(const char* delimiters);

Expand Down
2 changes: 2 additions & 0 deletions core/indigo-core/molecule/molecule_cdxml_saver.h
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,7 @@ namespace indigo
void addColorToTable(int id, int r, int g, int b);
void saveMoleculeFragment(BaseMolecule& mol, const Vec2f& offset, float scale, int frag_id, int& id, std::vector<int>& ids);
void saveMoleculeFragment(BaseMolecule& mol, const Vec2f& offset, float scale);
void saveRGroup(PtrPool<BaseMolecule>& fragments, const Vec2f& offset, int rgnum);

void addMetaObject(const MetaObject& obj, int id);
void addArrow(int id, int arrow_type, const Vec2f& beg, const Vec2f& end);
Expand Down Expand Up @@ -121,6 +122,7 @@ namespace indigo

protected:
void _collectSuperatoms(BaseMolecule& mol);
int _getAttachmentPoint(BaseMolecule& mol, int atom_idx);
Output& _output;

float _bond_length;
Expand Down
16 changes: 2 additions & 14 deletions core/indigo-core/molecule/src/molecule_auto_loader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -263,6 +263,8 @@ void MoleculeAutoLoader::_loadMolecule(BaseMolecule& mol)
}
}

_scanner->skipBom();

// check for MDLCT format
{
QS_DEF(Array<char>, buf);
Expand Down Expand Up @@ -344,17 +346,6 @@ void MoleculeAutoLoader::_loadMolecule(BaseMolecule& mol)
// check json format
long long pos = _scanner->tell();
{
bool hasbom = false;
if (_scanner->length() >= 3)
{
unsigned char bom[3];
_scanner->readCharsFix(3, (char*)bom);
if (bom[0] == 0xEF && bom[1] == 0xBB && bom[2] == 0xBF)
hasbom = true;
else
_scanner->seek(pos, SEEK_SET);
}

if (_scanner->lookNext() == '{')
{
if (_scanner->findWord("root") && _scanner->findWord("nodes"))
Expand All @@ -366,9 +357,6 @@ void MoleculeAutoLoader::_loadMolecule(BaseMolecule& mol)
_scanner->readAll(buf);
buf.push(0);
unsigned char* ptr = (unsigned char*)buf.ptr();
// skip utf8 BOM
if (hasbom)
ptr += 3;
Document data;
if (!data.Parse((char*)ptr).HasParseError())
{
Expand Down
34 changes: 25 additions & 9 deletions core/indigo-core/molecule/src/molecule_cdxml_loader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -984,6 +984,18 @@ void MoleculeCdxmlLoader::_parseAltGroup(CDXElement elem)
{
std::vector<AutoInt> r_labels;
std::vector<CDXElement> r_fragments;

std::pair<Vec2f, Vec2f> bbox, text_frame, group_frame;
auto bbox_lambda = [&bbox, this](const std::string& data) { this->parseSeg(data, bbox.first, bbox.second); };
auto text_frame_lambda = [&text_frame, this](const std::string& data) { this->parseSeg(data, text_frame.first, text_frame.second); };
auto group_frame_lambda = [&group_frame, this](const std::string& data) { this->parseSeg(data, group_frame.first, group_frame.second); };

std::unordered_map<std::string, std::function<void(const std::string&)>> altgroup_dispatcher = {
{"BoundingBox", bbox_lambda}, {"TextFrame", text_frame_lambda}, {"GroupFrame", group_frame_lambda}};

auto prop = elem.firstProperty();
applyDispatcher(prop, altgroup_dispatcher);

for (auto r_elem = elem.firstChildElement(); r_elem.hasContent(); r_elem = r_elem.nextSiblingElement())
{
auto el_name = r_elem.name();
Expand All @@ -998,16 +1010,20 @@ void MoleculeCdxmlLoader::_parseAltGroup(CDXElement elem)
}
}

if (r_fragments.size() && r_labels.size())
if (r_labels.size())
{
MoleculeCdxmlLoader alt_loader(_scanner, _is_binary);
BaseMolecule& mol = _pmol ? *(BaseMolecule*)_pmol : *(BaseMolecule*)_pqmol;
std::unique_ptr<BaseMolecule> fragment(mol.neu());
alt_loader.stereochemistry_options = stereochemistry_options;
alt_loader.loadMoleculeFromFragment(*fragment.get(), r_fragments.front());
MoleculeRGroups& rgroups = mol.rgroups;
RGroup& rgroup = rgroups.getRGroup(r_labels.front());
rgroup.fragments.add(fragment.release());
// TODO: check if there are some fragments inside of group_frame_lambda and put them into r_fragments
if (r_fragments.size())
{
MoleculeCdxmlLoader alt_loader(_scanner, _is_binary);
BaseMolecule& mol = _pmol ? *(BaseMolecule*)_pmol : *(BaseMolecule*)_pqmol;
std::unique_ptr<BaseMolecule> fragment(mol.neu());
alt_loader.stereochemistry_options = stereochemistry_options;
alt_loader.loadMoleculeFromFragment(*fragment.get(), r_fragments.front());
MoleculeRGroups& rgroups = mol.rgroups;
RGroup& rgroup = rgroups.getRGroup(r_labels.front());
rgroup.fragments.add(fragment.release());
}
}
}

Expand Down
75 changes: 74 additions & 1 deletion core/indigo-core/molecule/src/molecule_cdxml_saver.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -488,6 +488,26 @@ void MoleculeCdxmlSaver::addDefaultColorTable()
addColorTable(color.ptr());
}

int MoleculeCdxmlSaver::_getAttachmentPoint(BaseMolecule& mol, int atom_idx)
{
int val = 0;
if (mol.attachmentPointCount())
{
for (int idx = 1; idx <= mol.attachmentPointCount(); idx++)
{
for (int j = 0; mol.getAttachmentPoint(idx, j) != -1; j++)
{
if (mol.getAttachmentPoint(idx, j) == atom_idx)
{
val |= 1 << (idx - 1);
break;
}
}
}
}
return val;
}

void MoleculeCdxmlSaver::addNodeToFragment(BaseMolecule& mol, XMLElement* fragment, int atom_idx, const Vec2f& offset, Vec2f& min_coord, Vec2f& max_coord,
Vec2f& node_pos)
{
Expand Down Expand Up @@ -573,6 +593,11 @@ void MoleculeCdxmlSaver::addNodeToFragment(BaseMolecule& mol, XMLElement* fragme
if (hcount >= 0)
node->SetAttribute("NumHydrogens", hcount);
}

if (_getAttachmentPoint(mol, atom_idx))
{
node->SetAttribute("NodeType", "ExternalConnectionPoint");
}
}
else if (atom_number < 0)
{
Expand Down Expand Up @@ -1060,6 +1085,46 @@ void MoleculeCdxmlSaver::saveMoleculeFragment(BaseMolecule& mol, const Vec2f& of
saveMoleculeFragment(mol, offset, scale, -1, id, ids);
}

void MoleculeCdxmlSaver::saveRGroup(PtrPool<BaseMolecule>& fragments, const Vec2f& offset, int rgnum)
{
XMLElement* parent = _current;
XMLElement* fragment = _doc->NewElement("altgroup");
_current->LinkEndChild(fragment);
_current = fragment;
fragment->SetAttribute("id", ++_id);
Vec2f rmin, rmax;
int valence = 0;
for (int i = fragments.begin(); i != fragments.end(); i = fragments.next(i))
{
Vec2f min_coord, max_coord;
fragments[i]->getBoundingBox(min_coord, max_coord);
if (i == fragments.begin())
{
rmin.copy(min_coord);
rmax.copy(max_coord);
}
else
{
rmin.min(min_coord);
rmax.max(max_coord);
}
saveMoleculeFragment(*fragments[i], offset, 1);
valence += fragments[i]->attachmentPointCount();
}
std::string rg_name("R");
rg_name += std::to_string(rgnum);
rmin.add(offset);
rmax.add(offset);
Vec2f text_origin(rmin.x, rmax.y);
addText(text_origin, rg_name.c_str(), nullptr);
rmin.x *= _scale;
rmax.x *= _scale;
rmax.y *= -_scale;
rmin.y *= -_scale;
auto gframe = std::to_string(rmin.x) + " " + std::to_string(rmin.y) + " " + std::to_string(rmax.x) + " " + std::to_string(rmax.y);
fragment->SetAttribute("Valence", valence);
}

void MoleculeCdxmlSaver::saveMoleculeFragment(BaseMolecule& mol, const Vec2f& offset, float structure_scale, int frag_id, int& id, std::vector<int>& ids)
{
_atoms_ids.clear();
Expand Down Expand Up @@ -1407,7 +1472,8 @@ void MoleculeCdxmlSaver::addText(const Vec2f& pos, const char* text, const char*
out.printf("%f %f", _bond_length * pos.x, -_bond_length * pos.y);
buf.push(0);
t->SetAttribute("p", buf.ptr());
t->SetAttribute("Justification", alignment);
if (alignment)
t->SetAttribute("Justification", alignment);
t->SetAttribute("InterpretChemically", "no");

XMLElement* s = _doc->NewElement("s");
Expand Down Expand Up @@ -1800,6 +1866,13 @@ void MoleculeCdxmlSaver::saveMolecule(BaseMolecule& mol)
Vec2f offset(-min_coord.x, -max_coord.y);

saveMoleculeFragment(mol, offset, 1);
for (int i = 1; i <= mol.rgroups.getRGroupCount(); i++)
{
auto& rgrp = mol.rgroups.getRGroup(i);
if (rgrp.fragments.size())
saveRGroup(rgrp.fragments, offset, i);
}

endPage();
endDocument();
}

0 comments on commit fabcf73

Please sign in to comment.