diff --git a/api/c/tests/unit/tests/formats.cpp b/api/c/tests/unit/tests/formats.cpp index b9010de1b5..dcf05d45a7 100644 --- a/api/c/tests/unit/tests/formats.cpp +++ b/api/c/tests/unit/tests/formats.cpp @@ -60,14 +60,14 @@ TEST_F(IndigoApiFormatsTest, molecule) EXPECT_EQ(7, indigoCountBonds(obj)); // 3 - const string expectedSmarts = "[#6;A]1-[#6;A]=[#6;A]-[#6;A]=[#6;A]-[#6;A]=1-[!#1]"; + const string expectedSmarts = "[C]1-[C]=[C]-[C]=[C]-[C]=1-[*]"; obj = indigoLoadStructureFromString(mStr.c_str(), "smarts"); EXPECT_STREQ(expectedSmarts.c_str(), indigoSmarts(obj)); EXPECT_EQ(7, indigoCountAtoms(obj)); EXPECT_EQ(7, indigoCountBonds(obj)); // 4 - const string expectedQuery = "[#6]1-[#6]=[#6]-[#6]=[#6]-[#6]=1-[!#1]"; + const string expectedQuery = "[#6]1-[#6]=[#6]-[#6]=[#6]-[#6]=1-[*]"; obj = indigoLoadStructureFromString(mStr.c_str(), "query"); EXPECT_STREQ(expectedQuery.c_str(), indigoSmarts(obj)); EXPECT_EQ(7, indigoCountAtoms(obj)); @@ -83,13 +83,13 @@ TEST_F(IndigoApiFormatsTest, reaction) { const string react = "C1=C(*)C=CC=C1>>C1=CC=CC(*)=C1"; const string expected = "C1C=CC=CC=1*>>C1C=C(*)C=CC=1"; + const string expected_smarts = "[C]1-[C]=[C]-[C]=[C]-[C]=1-[*]>>[C]1-[C]=[C](-[*])-[C]=[C]-[C]=1"; try { int obj = -1; obj = indigoLoadStructureFromString(react.c_str(), "smarts"); - EXPECT_STREQ(expected.c_str(), indigoSmiles(obj)); - EXPECT_STREQ(expected.c_str(), indigoCanonicalSmiles(obj)); + EXPECT_STREQ(expected_smarts.c_str(), indigoSmarts(obj)); EXPECT_EQ(1, indigoCountReactants(obj)); EXPECT_EQ(1, indigoCountProducts(obj)); EXPECT_EQ(2, indigoCountMolecules(obj)); diff --git a/api/tests/integration/common/rendering/__init__.py b/api/tests/integration/common/rendering/__init__.py index 5b80150aea..a72ccd4065 100644 --- a/api/tests/integration/common/rendering/__init__.py +++ b/api/tests/integration/common/rendering/__init__.py @@ -1,3 +1,4 @@ +import base64 import os import platform import sys @@ -204,14 +205,17 @@ def checkBitmapSimilarity(filename, ref_filename): return "%s rendering status: Problem: %s" % (filename, str(e)) channels = ["red", "green", "blue", "alpha"] + with open("%s/out/%s" % (dirname, filename), "rb") as file: + binary_data = file.read() for i, result in enumerate(results): if result > (HASH_SIZE**2) * 0.1: return ( - "%s rendering status: Problem: PNG similarity is %s for %s channel" + "%s rendering status: Problem: PNG similarity is %s for %s channel\n%s\n" % ( filename, round(1 - (result / float(HASH_SIZE**2)), 2), channels[i], + base64.b64encode(binary_data), ) ) diff --git a/api/tests/integration/ref/basic/load_structure.py.out b/api/tests/integration/ref/basic/load_structure.py.out index 96fb38bb49..cd0a855d06 100644 --- a/api/tests/integration/ref/basic/load_structure.py.out +++ b/api/tests/integration/ref/basic/load_structure.py.out @@ -26,7 +26,7 @@ C1C=CC=CC=1*>>C1C=C(*)C=CC=1 C1C=CC=CC=1* |$;;;;;;A$| (** #8 **): Smarts, (smarts parameter): -[#8;A;H]-[#6;a]1-,:[#6;a]-,:[#6;a]-,:[#6;a]-,:[#6;a]-,:[#6;a]-,:1 +[O;H]-[c]1-,:[c]-,:[c]-,:[c]-,:[c]-,:[c]-,:1 (** #9 **): Smarts, (query parameter): [OH]C1:C:C:C:C:C:1 diff --git a/api/tests/integration/ref/formats/smarts.py.out b/api/tests/integration/ref/formats/smarts.py.out index c61ce65b05..8c9ad54b94 100644 --- a/api/tests/integration/ref/formats/smarts.py.out +++ b/api/tests/integration/ref/formats/smarts.py.out @@ -9,3 +9,20 @@ CC[C+5]CCCCC **** Load and Save as Query with component-level grouping **** ([#8].[#6]) is ok. smarts_in==smarts_out ([#8].[#6]).([#8].[#6]) is ok. smarts_in==smarts_out +[!C;!b] is ok. smarts_in==smarts_out +[*] is ok. smarts_in==smarts_out +[*;R1] is ok. smarts_in==smarts_out +[*;R3] is ok. smarts_in==smarts_out +[r] is ok. smarts_in==smarts_out +[r0] is ok. smarts_in==smarts_out +[r1] is ok. smarts_in==smarts_out +[r3] is ok. smarts_in==smarts_out +[v] is ok. smarts_in==smarts_out +[v0] is ok. smarts_in==smarts_out +[v3] is ok. smarts_in==smarts_out +[+0] is ok. smarts_in==smarts_out +[#6]@[#6] is ok. smarts_in==smarts_out +[#9]/[#6] is ok. smarts_in==smarts_out +[#9]/[#6]=[C]/[#17] is ok. smarts_in==smarts_out +[O;H] is ok. smarts_in==smarts_out +[!O;H] is ok. smarts_in==smarts_out diff --git a/api/tests/integration/test.py b/api/tests/integration/test.py index 66d765239f..0140c7b4a1 100644 --- a/api/tests/integration/test.py +++ b/api/tests/integration/test.py @@ -346,6 +346,8 @@ def run_analyze_test(args): f.close() with stdout_lock: sys.__stdout__.write(out_message + "\n") + if test_status == "[FAILED]": + sys.__stdout__.write(msg + "\n") test_result = (root, filename, test_status, msg, tspent) return test_result diff --git a/api/tests/integration/tests/formats/smarts.py b/api/tests/integration/tests/formats/smarts.py old mode 100644 new mode 100755 index 6d4c4b039a..b421fd6801 --- a/api/tests/integration/tests/formats/smarts.py +++ b/api/tests/integration/tests/formats/smarts.py @@ -1,3 +1,4 @@ +#!/bin/env python3 import os import sys @@ -23,8 +24,8 @@ def test_smarts_load_save(smarts_in): print("%s is ok. smarts_in==smarts_out" % smarts_in) else: print("smarts_in!=smarts_out") - print(" smarts_in=%s", smarts_in) - print("smarts_out=%s", smarts_out) + print(" smarts_in=%s" % smarts_in) + print("smarts_out=%s" % smarts_out) molstr = """ @@ -96,3 +97,21 @@ def test_smarts_load_save(smarts_in): print("**** Load and Save as Query with component-level grouping ****") test_smarts_load_save("([#8].[#6])") test_smarts_load_save("([#8].[#6]).([#8].[#6])") + +test_smarts_load_save("[!C;!b]") +test_smarts_load_save("[*]") +test_smarts_load_save("[*;R1]") +test_smarts_load_save("[*;R3]") +test_smarts_load_save("[r]") +test_smarts_load_save("[r0]") +test_smarts_load_save("[r1]") +test_smarts_load_save("[r3]") +test_smarts_load_save("[v]") +test_smarts_load_save("[v0]") +test_smarts_load_save("[v3]") +test_smarts_load_save("[+0]") +test_smarts_load_save("[#6]@[#6]") +test_smarts_load_save("[#9]/[#6]") +test_smarts_load_save("[#9]/[#6]=[C]/[#17]") +test_smarts_load_save("[O;H]") +test_smarts_load_save("[!O;H]") diff --git a/api/tests/integration/tests/rendering/ref/linux/smarts/0190.png b/api/tests/integration/tests/rendering/ref/linux/smarts/0190.png index 0e59d707d1..cbece9861c 100644 Binary files a/api/tests/integration/tests/rendering/ref/linux/smarts/0190.png and b/api/tests/integration/tests/rendering/ref/linux/smarts/0190.png differ diff --git a/api/tests/integration/tests/rendering/ref/mac/smarts/0190.png b/api/tests/integration/tests/rendering/ref/mac/smarts/0190.png index 82ce5b0873..4464b5cb2f 100644 Binary files a/api/tests/integration/tests/rendering/ref/mac/smarts/0190.png and b/api/tests/integration/tests/rendering/ref/mac/smarts/0190.png differ diff --git a/api/tests/integration/tests/rendering/ref/win/smarts/0190.png b/api/tests/integration/tests/rendering/ref/win/smarts/0190.png index 9ded79fd4d..6a789ead35 100644 Binary files a/api/tests/integration/tests/rendering/ref/win/smarts/0190.png and b/api/tests/integration/tests/rendering/ref/win/smarts/0190.png differ diff --git a/core/indigo-core/common/base_cpp/output.h b/core/indigo-core/common/base_cpp/output.h index 2ae6679a0a..be40558b23 100644 --- a/core/indigo-core/common/base_cpp/output.h +++ b/core/indigo-core/common/base_cpp/output.h @@ -41,6 +41,10 @@ namespace indigo virtual void write(const void* data, int size) = 0; virtual void flush() = 0; + virtual long long tell() const noexcept + { + return 0; + } virtual void writeByte(byte value); @@ -63,18 +67,13 @@ namespace indigo void printfCR(const char* format, ...); }; - class DLLEXPORT OutputTell - { - virtual long long tell() const noexcept = 0; - }; - class DLLEXPORT OutputSeek { virtual void seek(long long offset, int from) = 0; void skip(int count); }; - class DLLEXPORT FileOutput : public Output, public OutputSeek, public OutputTell + class DLLEXPORT FileOutput : public Output, public OutputSeek { public: FileOutput(Encoding filename_encoding, const char* filename); @@ -96,7 +95,7 @@ namespace indigo FILE* _file; }; - class DLLEXPORT ArrayOutput : public Output, public OutputTell + class DLLEXPORT ArrayOutput : public Output { public: explicit ArrayOutput(Array& arr); @@ -111,7 +110,7 @@ namespace indigo Array& _arr; }; - class DLLEXPORT StringOutput : public Output, public OutputTell + class DLLEXPORT StringOutput : public Output { public: StringOutput() = delete; @@ -127,7 +126,7 @@ namespace indigo std::string& _str; }; - class DLLEXPORT StandardOutput : public Output, public OutputTell + class DLLEXPORT StandardOutput : public Output { public: explicit StandardOutput(); diff --git a/core/indigo-core/common/gzip/gzip_output.h b/core/indigo-core/common/gzip/gzip_output.h index d5653fa79e..6101e531c1 100644 --- a/core/indigo-core/common/gzip/gzip_output.h +++ b/core/indigo-core/common/gzip/gzip_output.h @@ -27,7 +27,7 @@ namespace indigo { - class GZipOutput : public Output, OutputTell + class GZipOutput : public Output { public: enum diff --git a/core/indigo-core/molecule/base_molecule.h b/core/indigo-core/molecule/base_molecule.h index ef49bd4ea8..b7f19d68c5 100644 --- a/core/indigo-core/molecule/base_molecule.h +++ b/core/indigo-core/molecule/base_molecule.h @@ -69,7 +69,9 @@ namespace indigo _BOND_DOUBLE_OR_AROMATIC = 7, _BOND_ANY = 8, _BOND_COORDINATION = 9, - _BOND_HYDROGEN = 10 + _BOND_HYDROGEN = 10, + BOND_SMARTS_UP = 11, + BOND_SMARTS_DOWN = 12, }; enum diff --git a/core/indigo-core/molecule/query_molecule.h b/core/indigo-core/molecule/query_molecule.h index cd98fa75fd..b3f8c78ebf 100644 --- a/core/indigo-core/molecule/query_molecule.h +++ b/core/indigo-core/molecule/query_molecule.h @@ -97,6 +97,7 @@ namespace indigo ATOM_TEMPLATE_SEQID, ATOM_TEMPLATE_CLASS, ATOM_PI_BONDED, + ATOM_CHILARITY, BOND_ORDER, BOND_TOPOLOGY, @@ -117,6 +118,8 @@ namespace indigo // otherwise: no children PtrArray children; + bool artificial; // if true - added by parser to comply restrictions + // Check if node has any constraint of the specific type bool hasConstraint(int what_type); @@ -137,6 +140,8 @@ namespace indigo bool sureValueBelongs(int what_type, const int* arr, int count); bool sureValueBelongsInv(int what_type, const int* arr, int count); + bool hasOP_OR(); + // Optimize query for faster substructure search void optimize(); diff --git a/core/indigo-core/molecule/src/query_molecule.cpp b/core/indigo-core/molecule/src/query_molecule.cpp index 6a46a3175c..17ae527d92 100644 --- a/core/indigo-core/molecule/src/query_molecule.cpp +++ b/core/indigo-core/molecule/src/query_molecule.cpp @@ -506,7 +506,7 @@ bool QueryMolecule::isSaturatedAtom(int idx) throw Error("not implemented"); } -QueryMolecule::Node::Node(int type_) +QueryMolecule::Node::Node(int type_) : artificial(false) { type = (OpType)type_; } @@ -1186,6 +1186,29 @@ bool QueryMolecule::Node::sureValueBelongs(int what_type, const int* arr, int co } } +bool QueryMolecule::Node::hasOP_OR() +{ + int i; + + switch (type) + { + case OP_AND: { + for (i = 0; i < children.size(); i++) + if (children[i]->hasOP_OR()) + return true; + + return false; + } + case OP_OR: { + return true; + } + case OP_NOT: + return false; + default: + return false; + } +} + QueryMolecule::Atom* QueryMolecule::Atom::sureConstraint(int what_type) { int count = 0; diff --git a/core/indigo-core/molecule/src/smiles_loader.cpp b/core/indigo-core/molecule/src/smiles_loader.cpp index 71a519e3da..275f1fef7b 100644 --- a/core/indigo-core/molecule/src/smiles_loader.cpp +++ b/core/indigo-core/molecule/src/smiles_loader.cpp @@ -2148,8 +2148,9 @@ void SmilesLoader::_forbidHydrogens() std::unique_ptr newatom; std::unique_ptr oldatom(_qmol->releaseAtom(i)); - newatom.reset( - QueryMolecule::Atom::und(QueryMolecule::Atom::nicht(new QueryMolecule::Atom(QueryMolecule::ATOM_NUMBER, ELEM_H)), oldatom.release())); + std::unique_ptr notH(QueryMolecule::Atom::nicht(new QueryMolecule::Atom(QueryMolecule::ATOM_NUMBER, ELEM_H))); + notH->artificial = true; + newatom.reset(QueryMolecule::Atom::und(notH.release(), oldatom.release())); _qmol->resetAtom(i, newatom.release()); } @@ -2565,7 +2566,10 @@ void SmilesLoader::_readBondSub(Array& bond_str, _BondDesc& bond, std::uni else if (next == '/') { scanner.skip(1); - order = BOND_SINGLE; + if (smarts_mode) + order = BOND_SMARTS_UP; + else + order = BOND_SINGLE; if (bond.dir == 2) throw Error("Specificiation of both cis- and trans- bond restriction is not supported yet."); bond.dir = 1; @@ -2573,7 +2577,10 @@ void SmilesLoader::_readBondSub(Array& bond_str, _BondDesc& bond, std::uni else if (next == '\\') { scanner.skip(1); - order = BOND_SINGLE; + if (smarts_mode) + order = BOND_SMARTS_DOWN; + else + order = BOND_SINGLE; if (bond.dir == 1) throw Error("Specificiation of both cis- and trans- bond restriction is not supported yet."); bond.dir = 2; @@ -3301,6 +3308,8 @@ void SmilesLoader::_readAtom(Array& atom_str, bool first_in_brackets, _Ato if (isdigit(scanner.lookNext())) subatom = std::make_unique(QueryMolecule::ATOM_SMALLEST_RING_SIZE, scanner.readUnsigned()); + else if (smarts_mode) + subatom = std::make_unique(QueryMolecule::ATOM_SMALLEST_RING_SIZE, 1, 100); else subatom = std::make_unique(QueryMolecule::ATOM_RING_BONDS, 1, 100); } diff --git a/core/indigo-core/molecule/src/smiles_saver.cpp b/core/indigo-core/molecule/src/smiles_saver.cpp index 7a7413838f..efa5a69d3a 100644 --- a/core/indigo-core/molecule/src/smiles_saver.cpp +++ b/core/indigo-core/molecule/src/smiles_saver.cpp @@ -922,6 +922,31 @@ void SmilesSaver::_writeCharge(int charge) const _output.printf("-"); } +static void _write_num(indigo::Output& output, unsigned char ch, int num) +{ + output.writeChar(ch); + if (num != 1) + output.printf("%d", num); +} + +static void _write_num_if_set(indigo::Output& output, unsigned char ch, int min, int max) +{ + if (min == 1 && max == 100) + output.writeChar(ch); + else + { + output.printf("%c%d", ch, min); + } +} + +static void writeAnd(Output& _output, QueryMolecule::Node* node, bool has_or_parent) +{ + if (has_or_parent) + _output.writeChar('&'); + else if (node->hasOP_OR()) + _output.writeChar(';'); +} + void SmilesSaver::_writeSmartsAtom(int idx, QueryMolecule::Atom* atom, int chirality, int depth, bool has_or_parent, bool has_not_parent) const { int i; @@ -932,20 +957,65 @@ void SmilesSaver::_writeSmartsAtom(int idx, QueryMolecule::Atom* atom, int chira switch (atom->type) { case QueryMolecule::OP_NOT: { + if (atom->artificial) // Skip atoms added by loader (!#1) + { + break; + } + else if (QueryMolecule::isNotAtom(*atom, ELEM_H)) + { + _output.printf("*"); + break; + } _output.writeChar('!'); _writeSmartsAtom(idx, (QueryMolecule::Atom*)atom->children[0], chirality, depth + 1, has_or_parent, true); break; } case QueryMolecule::OP_AND: { + bool has_number = false; + bool has_aromatic = false; + bool aromatic = false; + char atom_name[10]; + int cur_pos = _output.tell(); + for (i = 0; i < atom->children.size(); i++) + { + if (atom->children[i]->type == QueryMolecule::ATOM_NUMBER) + { + has_number = true; + strncpy(atom_name, Element::toString(static_cast(atom->children[0])->value_max), sizeof(atom_name)); + } + if (atom->children[i]->type == QueryMolecule::ATOM_AROMATICITY) + { + has_aromatic = true; + aromatic = static_cast(atom->children[i])->value_min == ATOM_AROMATIC; + } + } + if (has_aromatic && has_number) + { // Convert a & #6 -> c, A & #6 -> C + if (aromatic) + atom_name[0] = tolower(atom_name[0]); + _output.printf("%s", atom_name); + } for (i = 0; i < atom->children.size(); i++) { + if (has_aromatic && has_number && + (atom->children[i]->type == QueryMolecule::ATOM_AROMATICITY || atom->children[i]->type == QueryMolecule::ATOM_NUMBER)) + { + continue; + } if (atom->children[i]->type == QueryMolecule::ATOM_RADICAL || atom->children[i]->type == QueryMolecule::ATOM_VALENCE) { continue; } + if (atom->children[i]->type == QueryMolecule::OP_NOT && atom->children[i]->artificial) + { + continue; + } - if (i > 0) + if (_output.tell() > cur_pos) + { _output.writeChar(has_or_parent ? '&' : ';'); + cur_pos = _output.tell(); + } _writeSmartsAtom(idx, (QueryMolecule::Atom*)atom->children[i], chirality, depth + 1, has_or_parent, has_not_parent); } break; @@ -1002,6 +1072,8 @@ void SmilesSaver::_writeSmartsAtom(int idx, QueryMolecule::Atom* atom, int chira _output.printf("+"); else if (charge == -1) _output.printf("-"); + else + _output.printf("+0"); break; } case QueryMolecule::ATOM_FRAGMENT: { @@ -1021,11 +1093,12 @@ void SmilesSaver::_writeSmartsAtom(int idx, QueryMolecule::Atom* atom, int chira _output.writeChar('*'); break; case QueryMolecule::ATOM_TOTAL_H: { - int hydro = atom->value_min; - if (hydro == 1) - _output.printf("H"); - else - _output.printf("H%d", hydro); + _write_num(_output, 'H', atom->value_min); + break; + } + + case QueryMolecule::ATOM_SSSR_RINGS: { + _write_num_if_set(_output, 'R', atom->value_min, atom->value_max); break; } @@ -1035,24 +1108,12 @@ void SmilesSaver::_writeSmartsAtom(int idx, QueryMolecule::Atom* atom, int chira } case QueryMolecule::ATOM_RING_BONDS: { - if (atom->value_min == 1 && atom->value_max == 100) - _output.printf("x"); - else - { - _output.printf("x%d", atom->value_min); - } + _write_num_if_set(_output, 'x', atom->value_min, atom->value_max); break; } case QueryMolecule::ATOM_IMPLICIT_H: { - if (atom->value_min == 1 && atom->value_max == 100) - { - _output.printf("h"); - } - else - { - _output.printf("h%d", atom->value_min); - } + _write_num_if_set(_output, 'h', atom->value_min, atom->value_max); break; } @@ -1062,6 +1123,7 @@ void SmilesSaver::_writeSmartsAtom(int idx, QueryMolecule::Atom* atom, int chira } case QueryMolecule::ATOM_SMALLEST_RING_SIZE: { + _write_num_if_set(_output, 'r', atom->value_min, atom->value_max); break; } @@ -1085,6 +1147,11 @@ void SmilesSaver::_writeSmartsAtom(int idx, QueryMolecule::Atom* atom, int chira break; } + case QueryMolecule::ATOM_TOTAL_BOND_ORDER: { + _write_num(_output, 'v', atom->value_min); + break; + } + default: { throw Error("Unknown atom attribute %d", atom->type); break; @@ -1146,6 +1213,15 @@ void SmilesSaver::_writeSmartsBond(int idx, QueryMolecule::Bond* bond, bool has_ _output.writeChar('#'); else if (bond_order == BOND_AROMATIC) _output.writeChar(':'); + else if (bond_order == BOND_SMARTS_UP) + _output.writeChar('/'); + else if (bond_order == BOND_SMARTS_DOWN) + _output.writeChar('\\'); + break; + } + case QueryMolecule::BOND_TOPOLOGY: { + if (bond->value == TOPOLOGY_RING) + _output.writeChar('@'); break; } default:;