From ed66a2cd5b984b2f7fb17a8b22d5e471aa516c26 Mon Sep 17 00:00:00 2001 From: Jenke Scheen Date: Wed, 17 Apr 2024 16:36:40 +0200 Subject: [PATCH] choppa MVP --- choppa/render/.DS_Store | Bin 6148 -> 6148 bytes choppa/render/Template.html | 2 +- choppa/render/out.html | 2 +- choppa/render/render.py | 102 +++++++++++++++++++++++------------- choppa/render/utils.py | 54 +++++++++++++++++-- 5 files changed, 118 insertions(+), 42 deletions(-) diff --git a/choppa/render/.DS_Store b/choppa/render/.DS_Store index 4a67fb22747e34b8fd99b3fb9d5ceaf05d83ecb8..35dc4cfd8c75bc675df773fa7e8a1fa82aadc32a 100644 GIT binary patch delta 59 zcmZoMXffDe!pIbGWik(=)Z_q09=3f self.fitness_threshold] if len(fit_mutants) <= 6: # color by increasing redness the more fit mutants there are - color_res_dict[hex_color_codes[len(fit_mutants)]].append(resindex) + color_res_dict[HEX_COLOR_CODES[len(fit_mutants)]].append(resindex) else: # if there are more than 5 fit mutants just color it the most red - super mutable in any case - color_res_dict[hex_color_codes[6]].append(resindex) + color_res_dict[HEX_COLOR_CODES[6]].append(resindex) return color_res_dict @@ -377,13 +379,40 @@ def surface_coloring_dict_to_js(self, color_res_dict): def get_interaction_dict(self): """ Generates interactions to be displayed on the interactive HTML view. Interactions are colored - by the same rules as for PyMOL (`render.PublicationView()`). - """ - - """ - intn_dict = {'0_MET165.A': {'lig_at_x': '11.594', 'lig_at_y': '-1.029', 'lig_at_z': '22.265', 'prot_at_x': '12.038', 'prot_at_y': '1.138', 'prot_at_z': '19.397', 'type': 'hydrophobic_interaction', 'color': '#ffffff'}, '1_GLU166.A': {'lig_at_x': '5.139', 'lig_at_y': '1.557', 'lig_at_z': '19.297', 'prot_at_x': '7.624', 'prot_at_y': '4.118', 'prot_at_z': '18.338', 'type': 'hydrophobic_interaction', 'color': '#ffffff'}, '2_GLU166.A': {'lig_at_x': '9.020', 'lig_at_y': '1.593', 'lig_at_z': '21.267', 'prot_at_x': '9.672', 'prot_at_y': '2.793', 'prot_at_z': '18.548', 'type': 'hydrogen_bond', 'color': '#008000'}, '3_HIS41.A': {'lig_at_x': '12.093', 'lig_at_y': '0.095', 'lig_at_z': '22.863', 'prot_at_x': '11.997', 'prot_at_y': '-5.242', 'prot_at_z': '21.702', 'type': 'pi_stack', 'color': '#ffffff'}} - """ - + by the same rules as for PyMOL (`render.PublicationView()`), but a dict of interactions is + used which is generated in `render.PublicationView().pymol_add_interactions()`. + """ + intn_dict = {} + intn_count = 0 + get_contacts_mda(self.complex) + + for contact in get_contacts_mda(self.complex): + # first find the color. + contact_color = [ k for k,v in self.get_surface_coloring_dict().items() if contact[1].resid in v ] + + # check if the residue atom is in backbone, if so just overwrite the color to make the + # contact color green. + backbone_atoms = [ res for res in \ + biopython_to_mda(self.complex).select_atoms("protein and backbone") ] + if contact[1] in backbone_atoms: + contact_color = ["#047b14"] # green + + # then get the coordinates for the atoms involved; add to intn_dict + # while also prepending with an index so that we don't overwrite + # interactions purely keyed on residue alone (one res can have multiple + # interactions). + intn_dict[f"{intn_count}_{contact[1].resname}{contact[1].resid}"] = { + 'lig_at_x' : contact[0].position[0], + 'lig_at_y' : contact[0].position[1], + 'lig_at_z' : contact[0].position[2], + 'prot_at_x' : contact[1].position[0], + 'prot_at_y' : contact[1].position[1], + 'prot_at_z' : contact[1].position[2], + 'color' : contact_color[0] + } + intn_count += 1 + + return intn_dict def inject_stuff_in_template(self, sdf_str, pdb_str, surface_coloring, logoplot_dict, template="Template.html", out_file="out.html"): """" @@ -435,11 +464,9 @@ def inject_stuff_in_template(self, sdf_str, pdb_str, surface_coloring, logoplot_ line = line.replace("{{SURFACE_COLOR_INSERT}}", surface_coloring) # finally add interactions - - fout.write(line) - - - + if "{{INTN_DICT_INSERT}}" in line: + line = line.replace("{{INTN_DICT_INSERT}}", str(self.get_interaction_dict())) + fout.write(line) def render(self): @@ -452,15 +479,16 @@ def render(self): # Plus, this makes the multithreading easier to debug as there are fewer layers. logoplot_dict = self.get_logoplot_dict(confidence_lims) - # get the strings for the PDB (prot) and the SDF (lig, if present) + # # get the strings for the PDB (prot) and the SDF (lig, if present) lig_sdf_str, prot_pdb_str = split_pdb_str(self.complex_pdb_str) - # define the coloring of the surface + # # define the coloring of the surface surface_coloring = self.surface_coloring_dict_to_js(self.get_surface_coloring_dict()) - # define the interactions to show - pass - # # do a dirty HTML generation using the logoplot and fitness dicts. + # define the interactions to show contacts between ligand and protein + self.get_interaction_dict() + + # do a dirty HTML generation using the logoplot and fitness dicts. self.inject_stuff_in_template(lig_sdf_str, prot_pdb_str, surface_coloring, logoplot_dict) @@ -483,11 +511,11 @@ def render(self): from choppa.align.align import AlignFactory filled_aligned_fitness_dict = AlignFactory(fitness_dict, complex).align_fitness() - # PublicationView(filled_aligned_fitness_dict, - # complex, - # complex_rdkit, - # fitness_threshold=0.7).render() - + PublicationView(filled_aligned_fitness_dict, + complex, + complex_rdkit, + fitness_threshold=0.7).render() + InteractiveView(filled_aligned_fitness_dict, complex, complex_rdkit, diff --git a/choppa/render/utils.py b/choppa/render/utils.py index 7a3c6cc..899b6fd 100644 --- a/choppa/render/utils.py +++ b/choppa/render/utils.py @@ -1,9 +1,10 @@ import MDAnalysis from MDAnalysis.lib.util import NamedStream +from MDAnalysis.analysis import distances +from MDAnalysis.core.groups import AtomGroup import pymol2 -from rdkit import Chem +from Bio.PDB.PDBIO import PDBIO -import os from io import StringIO import warnings import tempfile @@ -25,6 +26,18 @@ def get_ligand_resnames_from_pdb_str(PDB_str, remove_solvent=True): resnames = set(ag.resnames) return list(resnames) +def biopython_to_mda(BP_complex): + """ + Converts a biopython protein object to an MDAnalysis one. + """ + with tempfile.TemporaryDirectory() as tmpdirname: + io=PDBIO() + io.set_structure(BP_complex) + io.save(f"{tmpdirname}/tmp_while_hmo_helps_write_to_stream.pdb") + + u = MDAnalysis.Universe(f"{tmpdirname}/tmp_while_hmo_helps_write_to_stream.pdb") + return u + def get_pdb_components(PDB_str, remove_solvent=True): """ Split a protein-ligand pdb into protein and ligand components @@ -132,7 +145,42 @@ def show_contacts( ) pymol_instance.cmd.set("dash_radius", "0.09", contacts_name) pymol_instance.cmd.set("dash_color", "green", contacts_name) - pymol_instance.cmd.hide("labels", contacts_name) + pymol_instance.cmd.hide("labels", contacts_name) return True +def get_contacts_mda( + complex, + bigcutoff=4.1, # for some reason MDA needs a little 0.1A nudge to agree with PyMOL measurements + remove_solvent=True, +): + """ + Use MDAnalysis to generate a dictionary of distance endpoint xyz coordinates between atoms in the ligand + and protein residues. + """ + contacts = [] + u = biopython_to_mda(complex) + if remove_solvent: + u = u.select_atoms("not (name H* or type OW)") + + lig = u.select_atoms("not protein") + prot = u.select_atoms("protein") + + distances_array = distances.distance_array(AtomGroup(lig), AtomGroup(prot)) + distances_array_within_cutoff = distances_array <= bigcutoff # makes the NxM array be boolean based on cutoff + + for contacted_lig_at, protein_sequence_distance_bool in zip(lig, distances_array_within_cutoff): + # for this atom in the ligand, find any protein atoms that are True (i.e. below cutoff) + contacted_res_ats = [prot[i] for i, contact in enumerate(protein_sequence_distance_bool) if contact] + + for contacted_res_at in contacted_res_ats: + # we only want to show HBD/HBA + if contacted_lig_at.type == "N" and contacted_res_at.type == "O" or \ + contacted_lig_at.type == "O" and contacted_res_at.type == "N": + contacts.append([contacted_lig_at, contacted_res_at]) + + return contacts + + + + \ No newline at end of file