Skip to content

Commit

Permalink
got logoplots to work for HTML generation, now need to do ints and su…
Browse files Browse the repository at this point in the history
…rface
  • Loading branch information
JenkeScheen committed Apr 16, 2024
1 parent 9fdd1e8 commit ca68a13
Show file tree
Hide file tree
Showing 9 changed files with 16,127 additions and 11 deletions.
Binary file added .DS_Store
Binary file not shown.
Binary file modified choppa/.DS_Store
Binary file not shown.
Binary file added choppa/render/.DS_Store
Binary file not shown.
236 changes: 236 additions & 0 deletions choppa/render/Template.html

Large diffs are not rendered by default.

15 changes: 9 additions & 6 deletions choppa/render/logoplots.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,17 +72,20 @@ def divide_fitness_types(self):

return {wildtype: wildtype_fitness}, unfit_mutants, fit_mutants

def render_logoplot(self, mutants, global_min_confidence=False, global_max_confidence=False, lhs=True):
def render_logoplot(self, mutants, global_min_confidence=False, global_max_confidence=False, lhs=True, wildtype=False):
"""
Creates a logoplot as a base64 string. Also annotes with confidence values if present.
TODO: nicer rounded ticks agnostic to array limits
"""
if len(mutants) == 0:
# this can happen when there are no mutants in this category. Return an empty base64 instead.
return ""
# this can happen when there are no mutants in this category. Return an empty white-sqare base64 instead.
return "iVBORw0KGgoAAAANSUhEUgAAAJYAAACfCAIAAACUbLd9AAAACXBIWXMAAAsTAAALEwEAmpwYAAABhElEQVR4nO3RwQkAIBDAMHX/nc8hfEghmaDQPTOLsvM7gFcW5lmYZ2GehXkW5lmYZ2GehXkW5lmYZ2GehXkW5lmYZ2GehXkW5lmYZ2GehXkW5lmYZ2GehXkW5lmYZ2GehXkW5lmYZ2GehXkW5lmYZ2GehXkW5lmYZ2GehXkW5lmYZ2GehXkW5lmYZ2GehXkW5lmYZ2GehXkW5lmYZ2GehXkW5lmYZ2GehXkW5lmYZ2GehXkW5lmYZ2GehXkW5lmYZ2GehXkW5lmYZ2GehXkW5lmYZ2GehXkW5lmYZ2GehXkW5lmYZ2GehXkW5lmYZ2GehXkW5lmYZ2GehXkW5lmYZ2GehXkW5lmYZ2GehXkW5lmYZ2GehXkW5lmYZ2GehXkW5lmYZ2GehXkW5lmYZ2GehXkW5lmYZ2GehXkW5lmYZ2GehXkW5lmYZ2GehXkW5lmYZ2GehXkW5lmYZ2GehXkW5lmYZ2GehXkW5lmYZ2GehXkW5lmYZ2GehXkW5lmYZ2GehXkW5l1WGwQ7i50I0AAAAABJRU5ErkJggg=="
plt.switch_backend('Agg') # prevents plt from opening a figure on OS
_, ax = plt.subplots(figsize=(3, 10))
if wildtype: # we want this to be a bit smaller and square because it'll always have 1 residue.
_, ax = plt.subplots(figsize=(4, 4))
else:
_, ax = plt.subplots(figsize=(3, 10))

# if there are confidences, we well color the logoplot AA letters by confidence and
# show a color bar if this is the left-hand-side logoplot.
Expand Down Expand Up @@ -147,9 +150,9 @@ def build_logoplot(self, global_min_confidence=False, global_max_confidence=Fals

# generate the logoplot base64 for wildtype (LHS, top), fit (LHS, bottom) and unfit (RHS; with colorbar)
wildtype_base64 = self.render_logoplot(wildtype, global_min_confidence=global_min_confidence,
global_max_confidence=global_max_confidence, lhs=True)
global_max_confidence=global_max_confidence, wildtype=True)
fit_base64 = self.render_logoplot(fit_mutants, global_min_confidence=global_min_confidence,
global_max_confidence=global_max_confidence, lhs=True)
global_max_confidence=global_max_confidence)
unfit_base64 = self.render_logoplot(unfit_mutants, global_min_confidence=global_min_confidence,
global_max_confidence=global_max_confidence, lhs=False)

Expand Down
4,248 changes: 4,248 additions & 0 deletions choppa/render/out.html

Large diffs are not rendered by default.

11,502 changes: 11,502 additions & 0 deletions choppa/render/pose.html

Large diffs are not rendered by default.

69 changes: 65 additions & 4 deletions choppa/render/render.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from concurrent.futures import ThreadPoolExecutor, as_completed
from tqdm import tqdm

from choppa.render.utils import show_contacts, get_ligand_resnames_from_pdb_str
from choppa.render.utils import show_contacts, get_ligand_resnames_from_pdb_str, split_pdb_str
from choppa.render.logoplots import LogoPlot
logging.basicConfig(stream=sys.stdout, level=logging.INFO)
logger = logging.getLogger()
Expand Down Expand Up @@ -316,7 +316,66 @@ def get_logoplot_dict(self, confidence_lims, multiprocess=False):
}}

return logoplot_dict


# also add in interactions dict


def inject_stuff_in_template(self, sdf_str, pdb_str, logoplot_dict, template="Template.html", out_file="out.html"):
""""
Replaces parts of a template HTML with relevant bits of data to get to a HTML view
of the (ligand-) protein, its fitness and its interactions (if any).
TODO: HMO to replace this crude replacement code with `jinja`.
"""
# create a bunch of DIVs of the logoplots.
logoplot_divs = ""
for _, logoplot_data in logoplot_dict.items():
# we have to write a DIV for each logoplot. keep this repetetive for HMO to understand more easily.
# we're just adding more and more to the `logoplot_divs` string with properly placed newlines to make this work.
# start with wildtype
LOGOPLOT_TYPE_INSERT = "logoplotbox_wt"
LOGOPLOT_DIV_ID_INSERT = f"wtDIV_{logoplot_data['fitness_aligned_index']}"
LOGOPLOT_DESCRIPTION_INSERT = "wt residue logoplot"
LOGOPLOT_BASE64_INSERT = str(logoplot_data['logoplots_base64']['wildtype']).replace("b'", "").replace("'", "") # cleanup some BytesIO artefacts; found using https://base64.guru/tools/repair
logoplot_divs += f'<div class="{LOGOPLOT_TYPE_INSERT}" id="{LOGOPLOT_DIV_ID_INSERT}" style="display:none">\n'\
+f' <img alt="{LOGOPLOT_DESCRIPTION_INSERT}" src="data:image/png;base64,{LOGOPLOT_BASE64_INSERT}" />\n'\
+'</div>\n' # NB: had to switch around quotation types bc JS is awful (the language, not the person)
# then do fit
LOGOPLOT_TYPE_INSERT = "logoplotbox_fit"
LOGOPLOT_DIV_ID_INSERT = f"fitDIV_{logoplot_data['fitness_aligned_index']}"
LOGOPLOT_DESCRIPTION_INSERT = "fit residue logoplot"
LOGOPLOT_BASE64_INSERT = str(logoplot_data['logoplots_base64']['fit']).replace("b'", "").replace("'", "") # cleanup some BytesIO artefacts; found using https://base64.guru/tools/repair
logoplot_divs += f'<div class="{LOGOPLOT_TYPE_INSERT}" id="{LOGOPLOT_DIV_ID_INSERT}" style="display:none">\n'\
+f' <img alt="{LOGOPLOT_DESCRIPTION_INSERT}" src="data:image/png;base64,{LOGOPLOT_BASE64_INSERT}" />\n'\
+'</div>\n' # NB: had to switch around quotation types bc JS is awful (the language, not the person)
# then do unfit
LOGOPLOT_TYPE_INSERT = "logoplotbox_unfit"
LOGOPLOT_DIV_ID_INSERT = f"unfitDIV_{logoplot_data['fitness_aligned_index']}"
LOGOPLOT_DESCRIPTION_INSERT = "unfit residue logoplot"
LOGOPLOT_BASE64_INSERT = str(logoplot_data['logoplots_base64']['unfit']).replace("b'", "").replace("'", "") # cleanup some BytesIO artefacts; found using https://base64.guru/tools/repair
logoplot_divs += f'<div class="{LOGOPLOT_TYPE_INSERT}" id="{LOGOPLOT_DIV_ID_INSERT}" style="display:none">\n'\
+f' <img alt="{LOGOPLOT_DESCRIPTION_INSERT}" src="data:image/png;base64,{LOGOPLOT_BASE64_INSERT}" />\n'\
+'</div>\n' # NB: had to switch around quotation types bc JS is awful (the language, not the person)

# add the PDB (protein) and SDF (ligand)
with open(template, "rt") as fin:
with open(out_file, "wt") as fout:
for line in fin:
line = line.replace("{{PDB_INSERT}}", f"{pdb_str}")
line = line.replace("{{SDF_INSERT}}", f"{sdf_str}")

# logoplots are a bit more complicated, need to add all those DIVs
if "{{LOGOPLOTS_INSERTS}}" in line:
line = line.replace("{{LOGOPLOTS_INSERTS}}", logoplot_divs)

# add in interactions
fout.write(line)

# then add interactions

# then add surface coloring





def render(self):
Expand All @@ -330,8 +389,10 @@ def render(self):
logoplot_dict = self.get_logoplot_dict(confidence_lims)

# get the strings for the PDB (prot) and the SDF (lig, if present)
lig_sdf_str, prot_pdb_str = split_pdb_str(self.complex_pdb_str)

# do a dirty HTML generation using the logoplot and fitness dicts.
self.inject_stuff_in_template(lig_sdf_str, prot_pdb_str, logoplot_dict)



Expand All @@ -342,8 +403,8 @@ def render(self):

from choppa.IO.input import FitnessFactory, ComplexFactory

fitness_dict = FitnessFactory(TOY_FITNESS_DATA_SECTIONED,
# confidence_colname="confidence"
fitness_dict = FitnessFactory(TOY_FITNESS_DATA_COMPLETE,
confidence_colname="confidence"
).get_fitness_basedict()
complex = ComplexFactory(TOY_COMPLEX).load_pdb()
complex_rdkit = ComplexFactory(TOY_COMPLEX).load_pdb_rdkit()
Expand Down
68 changes: 67 additions & 1 deletion choppa/render/utils.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,12 @@
import MDAnalysis
from MDAnalysis.lib.util import NamedStream
import pymol2
from rdkit import Chem

import os
from io import StringIO
import warnings
import tempfile

def get_ligand_resnames_from_pdb_str(PDB_str, remove_solvent=True):
"""
Expand All @@ -20,6 +25,66 @@ def get_ligand_resnames_from_pdb_str(PDB_str, remove_solvent=True):
resnames = set(ag.resnames)
return list(resnames)

def get_pdb_components(PDB_str, remove_solvent=True):
"""
Split a protein-ligand pdb into protein and ligand components
:param PDB_str:
:return:
"""
with warnings.catch_warnings():
warnings.simplefilter("ignore") # hides MDA RunTimeWarning that complains about string IO
u = MDAnalysis.Universe(NamedStream(StringIO(PDB_str), "complex.pdb"))

if remove_solvent:
ag = u.select_atoms("not (name H* or type OW)")

ligand = u.select_atoms("not protein")
protein = u.select_atoms("protein")

return ligand, protein


def process_ligand(ligand):
"""
Add bond orders to a pdb ligand in an MDA universe object.
1. load PDB into PyMol session (PyMOL does the bond guessing)
2. write ligand to stream as SDF
3. Read the stream into an RDKit molecule
"""
with tempfile.TemporaryDirectory() as tmpdirname:
ligand.write(f"{tmpdirname}/lig_tmp_while_hmo_helps_write_to_stream.pdb")

p = pymol2.PyMOL()
p.start()
p.cmd.load(f"{tmpdirname}/lig_tmp_while_hmo_helps_write_to_stream.pdb")
p.cmd.save(f"{tmpdirname}/lig_tmp_while_hmo_helps_write_to_stream.sdf", "all", 0) # writes all states, so should be able to handle multi-ligand
p.stop()

with open(f"{tmpdirname}/lig_tmp_while_hmo_helps_write_to_stream.sdf","r") as f:
string = f.read()
return string

def process_protein(protein):
"""
Returns the string for the protein in an MDA universe object.
"""
with tempfile.TemporaryDirectory() as tmpdirname:
protein.write(f"{tmpdirname}/prot_tmp_while_hmo_helps_write_to_stream.pdb")
with open(f"{tmpdirname}/prot_tmp_while_hmo_helps_write_to_stream.pdb","r") as f:
string = f.read()
return string

def split_pdb_str(PDB_str):
"""
From a PDB string, gets the string for the protein and (if present) the ligand SDF (with guessed
bond orders).
Inspired by https://gist.github.com/PatWalters/c046fee2760e6894ed13e19b8c99193b
TODO: set below functions through NamedStream instead of tmpdir
"""
ligand_pdb, protein_pdb = get_pdb_components(PDB_str)
return process_ligand(ligand_pdb), process_protein(protein_pdb)

def show_contacts(
pymol_instance,
selection_residues,
Expand Down Expand Up @@ -69,4 +134,5 @@ def show_contacts(
pymol_instance.cmd.set("dash_color", "green", contacts_name)
pymol_instance.cmd.hide("labels", contacts_name)

return True
return True

0 comments on commit ca68a13

Please sign in to comment.