Skip to content

Commit

Permalink
choppa MVP
Browse files Browse the repository at this point in the history
  • Loading branch information
JenkeScheen committed Apr 17, 2024
1 parent 23a7d4b commit ed66a2c
Show file tree
Hide file tree
Showing 5 changed files with 118 additions and 42 deletions.
Binary file modified choppa/render/.DS_Store
Binary file not shown.
2 changes: 1 addition & 1 deletion choppa/render/Template.html
Original file line number Diff line number Diff line change
Expand Up @@ -200,7 +200,7 @@
viewer.setHoverDuration(100); // makes resn popup instant on hover

//////////////// add protein-ligand interactions
var intn_dict = {'0_MET165.A': {'lig_at_x': '11.594', 'lig_at_y': '-1.029', 'lig_at_z': '22.265', 'prot_at_x': '12.038', 'prot_at_y': '1.138', 'prot_at_z': '19.397', 'type': 'hydrophobic_interaction', 'color': '#ffffff'}, '1_GLU166.A': {'lig_at_x': '5.139', 'lig_at_y': '1.557', 'lig_at_z': '19.297', 'prot_at_x': '7.624', 'prot_at_y': '4.118', 'prot_at_z': '18.338', 'type': 'hydrophobic_interaction', 'color': '#ffffff'}, '2_GLU166.A': {'lig_at_x': '9.020', 'lig_at_y': '1.593', 'lig_at_z': '21.267', 'prot_at_x': '9.672', 'prot_at_y': '2.793', 'prot_at_z': '18.548', 'type': 'hydrogen_bond', 'color': '#008000'}, '3_HIS41.A': {'lig_at_x': '12.093', 'lig_at_y': '0.095', 'lig_at_z': '22.863', 'prot_at_x': '11.997', 'prot_at_y': '-5.242', 'prot_at_z': '21.702', 'type': 'pi_stack', 'color': '#ffffff'}}
var intn_dict = {{INTN_DICT_INSERT}}
for (const [_, intn] of Object.entries(intn_dict)) {
viewer.addCylinder({start:{x:parseFloat(intn["lig_at_x"]),y:parseFloat(intn["lig_at_y"]),z:parseFloat(intn["lig_at_z"])},
end:{x:parseFloat(intn["prot_at_x"]),y:parseFloat(intn["prot_at_y"]),z:parseFloat(intn["prot_at_z"])},
Expand Down
2 changes: 1 addition & 1 deletion choppa/render/out.html
Original file line number Diff line number Diff line change
Expand Up @@ -4228,7 +4228,7 @@
viewer.setHoverDuration(100); // makes resn popup instant on hover

//////////////// add protein-ligand interactions
var intn_dict = {'0_MET165.A': {'lig_at_x': '11.594', 'lig_at_y': '-1.029', 'lig_at_z': '22.265', 'prot_at_x': '12.038', 'prot_at_y': '1.138', 'prot_at_z': '19.397', 'type': 'hydrophobic_interaction', 'color': '#ffffff'}, '1_GLU166.A': {'lig_at_x': '5.139', 'lig_at_y': '1.557', 'lig_at_z': '19.297', 'prot_at_x': '7.624', 'prot_at_y': '4.118', 'prot_at_z': '18.338', 'type': 'hydrophobic_interaction', 'color': '#ffffff'}, '2_GLU166.A': {'lig_at_x': '9.020', 'lig_at_y': '1.593', 'lig_at_z': '21.267', 'prot_at_x': '9.672', 'prot_at_y': '2.793', 'prot_at_z': '18.548', 'type': 'hydrogen_bond', 'color': '#008000'}, '3_HIS41.A': {'lig_at_x': '12.093', 'lig_at_y': '0.095', 'lig_at_z': '22.863', 'prot_at_x': '11.997', 'prot_at_y': '-5.242', 'prot_at_z': '21.702', 'type': 'pi_stack', 'color': '#ffffff'}}
var intn_dict = {'0_GLY130': {'lig_at_x': 10.401, 'lig_at_y': 2.386, 'lig_at_z': 24.891, 'prot_at_x': 11.877, 'prot_at_y': 6.039, 'prot_at_z': 25.324, 'color': '#047b14'}, '1_ILE131': {'lig_at_x': 10.401, 'lig_at_y': 2.386, 'lig_at_z': 24.891, 'prot_at_x': 11.114, 'prot_at_y': 4.034, 'prot_at_z': 28.164, 'color': '#047b14'}, '2_GLY48': {'lig_at_x': 9.463, 'lig_at_y': 0.36, 'lig_at_z': 24.84, 'prot_at_x': 9.407, 'prot_at_y': -2.604, 'prot_at_z': 27.335, 'color': '#047b14'}, '3_VAL49': {'lig_at_x': 9.463, 'lig_at_y': 0.36, 'lig_at_z': 24.84, 'prot_at_x': 11.09, 'prot_at_y': -2.286, 'prot_at_z': 24.165, 'color': '#047b14'}, '4_ALA21': {'lig_at_x': 8.339, 'lig_at_y': -0.924, 'lig_at_z': 18.765, 'prot_at_x': 7.401, 'prot_at_y': 0.87, 'prot_at_z': 15.232, 'color': '#047b14'}, '5_ASP22': {'lig_at_x': 7.203, 'lig_at_y': -2.833, 'lig_at_z': 19.718, 'prot_at_x': 6.308, 'prot_at_y': -4.149, 'prot_at_z': 17.236, 'color': '#ffffff'}}
for (const [_, intn] of Object.entries(intn_dict)) {
viewer.addCylinder({start:{x:parseFloat(intn["lig_at_x"]),y:parseFloat(intn["lig_at_y"]),z:parseFloat(intn["lig_at_z"])},
end:{x:parseFloat(intn["prot_at_x"]),y:parseFloat(intn["prot_at_y"]),z:parseFloat(intn["prot_at_z"])},
Expand Down
102 changes: 65 additions & 37 deletions choppa/render/render.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,23 @@
import math
from tqdm import tqdm

from choppa.render.utils import show_contacts, get_ligand_resnames_from_pdb_str, split_pdb_str
from choppa.render.utils import show_contacts, get_ligand_resnames_from_pdb_str, split_pdb_str, get_contacts_mda, biopython_to_mda
from choppa.render.logoplots import LogoPlot, WHITE_EMPTY_SQUARE, render_singleres_logoplot

logging.basicConfig(stream=sys.stdout, level=logging.INFO)
logger = logging.getLogger()

HEX_COLOR_CODES = [
"#ffffff",
"#ff9e83",
"#ff8a6c",
"#ff7454",
"#ff5c3d",
"#ff3f25",
"#ff0707",
"#642df0",
]

class PublicationView():
"""
Uses the PyMOL API to create a session file for publication-ready views of the fitness data on
Expand Down Expand Up @@ -117,6 +128,7 @@ def pymol_color_by_fitness(self, p):

# first get the fitness degree per residue and what colors to make them
residue_mutability_levels, mutability_color_dict = self.pymol_color_coder()
self.residue_mutability_levels = residue_mutability_levels

# now select the residues, name them and color them.
for fitness_degree, residues in residue_mutability_levels.items():
Expand Down Expand Up @@ -247,7 +259,7 @@ def __init__(self, filled_aligned_fitness_dict,
self.fitness_dict = filled_aligned_fitness_dict
self.complex = complex
self.fitness_threshold = fitness_threshold
self.output_session_file = output_session_file
self.output_session_file = output_session_file

# get the PDB file as a string from RDKit
self.complex_pdb_str = Chem.MolToPDBBlock(complex_rdkit)
Expand Down Expand Up @@ -320,27 +332,17 @@ def get_surface_coloring_dict(self):
"""
Based on fitness coloring, creates a dict where keys are colors, values are residue numbers.
"""
hex_color_codes = [
"#ffffff",
"#ff9e83",
"#ff8a6c",
"#ff7454",
"#ff5c3d",
"#ff3f25",
"#ff0707",
"#642df0",
]
color_res_dict = { color:[] for color in hex_color_codes }
color_res_dict = { color:[] for color in HEX_COLOR_CODES }
for resindex, fitness_data in self.fitness_dict.items():
if not 'mutants' in fitness_data:
# no fitness data for this residue after alignment between Fitness CSV and input PDB
color_res_dict["#642df0"].append(resindex) # makes residue surface blue
continue
fit_mutants = [mut for mut in fitness_data['mutants'] if mut['fitness'] > self.fitness_threshold]
if len(fit_mutants) <= 6: # color by increasing redness the more fit mutants there are
color_res_dict[hex_color_codes[len(fit_mutants)]].append(resindex)
color_res_dict[HEX_COLOR_CODES[len(fit_mutants)]].append(resindex)
else: # if there are more than 5 fit mutants just color it the most red - super mutable in any case
color_res_dict[hex_color_codes[6]].append(resindex)
color_res_dict[HEX_COLOR_CODES[6]].append(resindex)

return color_res_dict

Expand Down Expand Up @@ -377,13 +379,40 @@ def surface_coloring_dict_to_js(self, color_res_dict):
def get_interaction_dict(self):
"""
Generates interactions to be displayed on the interactive HTML view. Interactions are colored
by the same rules as for PyMOL (`render.PublicationView()`).
"""

"""
intn_dict = {'0_MET165.A': {'lig_at_x': '11.594', 'lig_at_y': '-1.029', 'lig_at_z': '22.265', 'prot_at_x': '12.038', 'prot_at_y': '1.138', 'prot_at_z': '19.397', 'type': 'hydrophobic_interaction', 'color': '#ffffff'}, '1_GLU166.A': {'lig_at_x': '5.139', 'lig_at_y': '1.557', 'lig_at_z': '19.297', 'prot_at_x': '7.624', 'prot_at_y': '4.118', 'prot_at_z': '18.338', 'type': 'hydrophobic_interaction', 'color': '#ffffff'}, '2_GLU166.A': {'lig_at_x': '9.020', 'lig_at_y': '1.593', 'lig_at_z': '21.267', 'prot_at_x': '9.672', 'prot_at_y': '2.793', 'prot_at_z': '18.548', 'type': 'hydrogen_bond', 'color': '#008000'}, '3_HIS41.A': {'lig_at_x': '12.093', 'lig_at_y': '0.095', 'lig_at_z': '22.863', 'prot_at_x': '11.997', 'prot_at_y': '-5.242', 'prot_at_z': '21.702', 'type': 'pi_stack', 'color': '#ffffff'}}
"""

by the same rules as for PyMOL (`render.PublicationView()`), but a dict of interactions is
used which is generated in `render.PublicationView().pymol_add_interactions()`.
"""
intn_dict = {}
intn_count = 0
get_contacts_mda(self.complex)

for contact in get_contacts_mda(self.complex):
# first find the color.
contact_color = [ k for k,v in self.get_surface_coloring_dict().items() if contact[1].resid in v ]

# check if the residue atom is in backbone, if so just overwrite the color to make the
# contact color green.
backbone_atoms = [ res for res in \
biopython_to_mda(self.complex).select_atoms("protein and backbone") ]
if contact[1] in backbone_atoms:
contact_color = ["#047b14"] # green

# then get the coordinates for the atoms involved; add to intn_dict
# while also prepending with an index so that we don't overwrite
# interactions purely keyed on residue alone (one res can have multiple
# interactions).
intn_dict[f"{intn_count}_{contact[1].resname}{contact[1].resid}"] = {
'lig_at_x' : contact[0].position[0],
'lig_at_y' : contact[0].position[1],
'lig_at_z' : contact[0].position[2],
'prot_at_x' : contact[1].position[0],
'prot_at_y' : contact[1].position[1],
'prot_at_z' : contact[1].position[2],
'color' : contact_color[0]
}
intn_count += 1

return intn_dict

def inject_stuff_in_template(self, sdf_str, pdb_str, surface_coloring, logoplot_dict, template="Template.html", out_file="out.html"):
""""
Expand Down Expand Up @@ -435,11 +464,9 @@ def inject_stuff_in_template(self, sdf_str, pdb_str, surface_coloring, logoplot_
line = line.replace("{{SURFACE_COLOR_INSERT}}", surface_coloring)

# finally add interactions

fout.write(line)



if "{{INTN_DICT_INSERT}}" in line:
line = line.replace("{{INTN_DICT_INSERT}}", str(self.get_interaction_dict()))
fout.write(line)


def render(self):
Expand All @@ -452,15 +479,16 @@ def render(self):
# Plus, this makes the multithreading easier to debug as there are fewer layers.
logoplot_dict = self.get_logoplot_dict(confidence_lims)

# get the strings for the PDB (prot) and the SDF (lig, if present)
# # get the strings for the PDB (prot) and the SDF (lig, if present)
lig_sdf_str, prot_pdb_str = split_pdb_str(self.complex_pdb_str)

# define the coloring of the surface
# # define the coloring of the surface
surface_coloring = self.surface_coloring_dict_to_js(self.get_surface_coloring_dict())
# define the interactions to show
pass

# # do a dirty HTML generation using the logoplot and fitness dicts.
# define the interactions to show contacts between ligand and protein
self.get_interaction_dict()

# do a dirty HTML generation using the logoplot and fitness dicts.
self.inject_stuff_in_template(lig_sdf_str, prot_pdb_str, surface_coloring, logoplot_dict)


Expand All @@ -483,11 +511,11 @@ def render(self):
from choppa.align.align import AlignFactory
filled_aligned_fitness_dict = AlignFactory(fitness_dict, complex).align_fitness()

# PublicationView(filled_aligned_fitness_dict,
# complex,
# complex_rdkit,
# fitness_threshold=0.7).render()

PublicationView(filled_aligned_fitness_dict,
complex,
complex_rdkit,
fitness_threshold=0.7).render()
InteractiveView(filled_aligned_fitness_dict,
complex,
complex_rdkit,
Expand Down
54 changes: 51 additions & 3 deletions choppa/render/utils.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
import MDAnalysis
from MDAnalysis.lib.util import NamedStream
from MDAnalysis.analysis import distances
from MDAnalysis.core.groups import AtomGroup
import pymol2
from rdkit import Chem
from Bio.PDB.PDBIO import PDBIO

import os
from io import StringIO
import warnings
import tempfile
Expand All @@ -25,6 +26,18 @@ def get_ligand_resnames_from_pdb_str(PDB_str, remove_solvent=True):
resnames = set(ag.resnames)
return list(resnames)

def biopython_to_mda(BP_complex):
"""
Converts a biopython protein object to an MDAnalysis one.
"""
with tempfile.TemporaryDirectory() as tmpdirname:
io=PDBIO()
io.set_structure(BP_complex)
io.save(f"{tmpdirname}/tmp_while_hmo_helps_write_to_stream.pdb")

u = MDAnalysis.Universe(f"{tmpdirname}/tmp_while_hmo_helps_write_to_stream.pdb")
return u

def get_pdb_components(PDB_str, remove_solvent=True):
"""
Split a protein-ligand pdb into protein and ligand components
Expand Down Expand Up @@ -132,7 +145,42 @@ def show_contacts(
)
pymol_instance.cmd.set("dash_radius", "0.09", contacts_name)
pymol_instance.cmd.set("dash_color", "green", contacts_name)
pymol_instance.cmd.hide("labels", contacts_name)
pymol_instance.cmd.hide("labels", contacts_name)

return True

def get_contacts_mda(
complex,
bigcutoff=4.1, # for some reason MDA needs a little 0.1A nudge to agree with PyMOL measurements
remove_solvent=True,
):
"""
Use MDAnalysis to generate a dictionary of distance endpoint xyz coordinates between atoms in the ligand
and protein residues.
"""
contacts = []
u = biopython_to_mda(complex)
if remove_solvent:
u = u.select_atoms("not (name H* or type OW)")

lig = u.select_atoms("not protein")
prot = u.select_atoms("protein")

distances_array = distances.distance_array(AtomGroup(lig), AtomGroup(prot))
distances_array_within_cutoff = distances_array <= bigcutoff # makes the NxM array be boolean based on cutoff

for contacted_lig_at, protein_sequence_distance_bool in zip(lig, distances_array_within_cutoff):
# for this atom in the ligand, find any protein atoms that are True (i.e. below cutoff)
contacted_res_ats = [prot[i] for i, contact in enumerate(protein_sequence_distance_bool) if contact]

for contacted_res_at in contacted_res_ats:
# we only want to show HBD/HBA
if contacted_lig_at.type == "N" and contacted_res_at.type == "O" or \
contacted_lig_at.type == "O" and contacted_res_at.type == "N":
contacts.append([contacted_lig_at, contacted_res_at])

return contacts




0 comments on commit ed66a2c

Please sign in to comment.