Skip to content

Commit

Permalink
#1254 SMARTS with component-level grouping saved without '()'
Browse files Browse the repository at this point in the history
 Add '()' save support. Add UTs.
  • Loading branch information
Aliaksandr Dziarkach committed Sep 11, 2023
1 parent 7b25b5d commit c2397ac
Show file tree
Hide file tree
Showing 2 changed files with 64 additions and 42 deletions.
102 changes: 62 additions & 40 deletions core/indigo-core/molecule/src/smiles_saver.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -75,45 +75,7 @@ void SmilesSaver::saveQueryMolecule(QueryMolecule& mol)
_bmol = &mol;
_qmol = &mol;
_mol = 0;
if (smarts_mode)
{
std::unordered_set<int> component_nums;
for (int i = 0; i < _qmol->components.size(); i++)
{
component_nums.insert(_qmol->components[i]);
}
if (component_nums.size() > 1)
{
std::unique_ptr<QueryMolecule> mol = std::make_unique<QueryMolecule>();
// decompose _qmol and save each component separately
std::list<std::unordered_set<int>>& extNeighbors = _qmol->getComponentNeighbors();
int fragment_count = _qmol->countComponents(extNeighbors);
for (int i = 0; i < fragment_count; ++i)
{
Array<int> mapping;
auto fragment = std::make_unique<QueryMolecule>();
Filter filt(_qmol->getDecomposition().ptr(), Filter::EQ, i);
fragment->makeSubmolecule(*_qmol, filt, &mapping, 0);
mol->mergeWithMolecule(*fragment, 0);
saveQueryMolecule(*fragment);
}
}
else
{
// _qmol.components contains only one value.
// If this value == 0 - no grouping used
// otherwise grouping used - SMARTS should be in parentheses
if (component_nums.count(0) == 0)
_output.writeChar('(');
_saveMolecule();
if (component_nums.count(0) == 0)
_output.writeChar(')');
}
}
else
{
_saveMolecule();
}
_saveMolecule();
}

void SmilesSaver::_saveMolecule()
Expand Down Expand Up @@ -223,6 +185,47 @@ void SmilesSaver::_saveMolecule()
walk.walk();

const Array<DfsWalk::SeqElem>& v_seq = walk.getSequence();
Array<int> v_to_comp_group;
v_to_comp_group.resize(v_seq.size());
v_to_comp_group.fffill();

if (_qmol != nullptr && smarts_mode)
{
if (v_seq.size() < 1)
return; // No atoms to save
std::set<int> components;
int cur_component = -1;
for (int i = 0; i < v_seq.size(); ++i)
{
// In v_seq each fragment started with vertex which parent == -1
// In SMARTS some fragments could be grouped (component-level grouping)
// In QueryMolecule group number stored in "".components" member. GroupId == 0 means no group defined.
// Each fragment - connected graph, so all vertexes should belong to one group.
// All group fragments should go one by one - in SMARTS its inside "()".
if (v_seq[i].parent_vertex < 0) // New Fragment
{
int new_component = _qmol->components[v_seq[i].idx];
// if component defined for new fragment(id>0) and its different from previous and seen before
if (new_component > 0 && new_component != cur_component && components.count(new_component))
{
// According to the DfsWalk code, the groups components should be neighbors.
// If will be found case when it wrong - add code to rearrange fragments
throw Error("SMARTS fragments need to reaarange.");
}
components.emplace(new_component);
cur_component = new_component;
}
else
{
if (cur_component != _qmol->components[v_seq[i].idx])
{
// Fragment contains atoms from different components - something went wrong
throw Error("Fragment contains atoms from different components.");
}
}
v_to_comp_group[i] = cur_component;
}
}

// fill up neighbor lists for the stereocenters calculation
for (i = 0; i < v_seq.size(); i++)
Expand Down Expand Up @@ -563,8 +566,25 @@ void SmilesSaver::_saveMolecule()
else
{
if (!first_component)
{
// group == 0 means no group set.
int prev_group = v_to_comp_group[i - 1];
int new_group = v_to_comp_group[i];
bool different_groups = new_group != prev_group;
if (smarts_mode && prev_group && different_groups) // if component group ended
_output.writeChar(')');

_output.writeChar('.');
first_component = false;

if (smarts_mode && new_group && different_groups) // if new group started
_output.writeChar('(');
}
else
{
if (smarts_mode && v_to_comp_group[i] > 0) // component level grouping set for this fragment
_output.writeChar('(');
first_component = false;
}
_written_components++;
}
if (write_atom)
Expand Down Expand Up @@ -637,6 +657,8 @@ void SmilesSaver::_saveMolecule()
_output.writeString("{+n}");
}
}
if (smarts_mode && v_to_comp_group[i - 1]) // if group set for last fragment - add finish )
_output.writeChar(')');

if (write_extra_info && chemaxon && !smarts_mode) // no extended block in SMARTS
{
Expand Down
4 changes: 2 additions & 2 deletions core/indigo-core/tests/tests/formats.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -329,7 +329,7 @@ TEST_F(IndigoCoreFormatsTest, smarts_load_save)
QueryMolecule q_mol;

//std::string smarts_in{"([#8:1].[#6:2])"};
std::string smarts_in{"([#8:1].[#6:2]).([#8:1].[#6:2])"};
std::string smarts_in{"([#8].[#6]).([#6].[#8])"};
BufferScanner scanner(smarts_in.c_str());
SmilesLoader loader(scanner);
loader.smarts_mode = true;
Expand All @@ -340,6 +340,6 @@ TEST_F(IndigoCoreFormatsTest, smarts_load_save)
saver.smarts_mode = true;
saver.saveQueryMolecule(q_mol);
std::string smarts_out{out.ptr(), static_cast<std::size_t>(out.size())};
printf("smart_in=%s\nsmart_out=%s\n", smarts_in, smarts_out);
ASSERT_EQ(smarts_in, smarts_out);
}

0 comments on commit c2397ac

Please sign in to comment.