-
Notifications
You must be signed in to change notification settings - Fork 0
/
code_test.py
296 lines (231 loc) · 8.88 KB
/
code_test.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
# ---Code test script only---
# This script contains many different code tests for the app_molviz_v1.py & app_molviz_v2.py
# Each code block has some subtitles to explain what each of the block is for
# Main tests are for RDKit/Datamol functions and also some PIL/Pillow image manipulations
# For final working app.py, please refer to app_molviz_v2.py
# --Import libraries
# Import Pandas
import pandas as pd
import polars as pl
# Add RDKit
from rdkit import Chem
from rdkit.Chem import Draw
from rdkit.Chem import PandasTools
from rdkit.Chem.Draw import rdMolDraw2D, MolsToGridImage
#from rdkit.Chem.rdmolfiles import SmilesWriter, SmilesMolSupplier
# **IPythonConsole for Jupyter notebook environment only**
from rdkit.Chem.Draw import IPythonConsole
# Set below to false to show PNG
IPythonConsole.ipython_useSVG=False
# Set below to True to return SVG
#IPythonConsole.ipython_useSVG=True
from IPython.display import SVG
import io
from PIL import Image
#from IPython.display import Image
from io import StringIO
import datamol as dm
# --Some code ideas:
# Reading from .smi file
# # Read in a simple list of SMILES from .smi file
# suppl = SmilesMolSupplier("cefe.smi")
# suppl
# # Convert a list of molecules into a dataframe
# mols = dm.to_df(suppl)
# mols
# # Generate a RDKit molecule column
# mols["mol"] = mols.smiles.apply(Chem.MolFromSmiles)
# # Show full dataframe - smiles & mol columns
# mols
# # Display first molecule in dataframe
# mols.iloc[0]["mol"]
# Function:
# def image(filename):
# # Read in a simple list of SMILES from .smi file
# suppl = SmilesMolSupplier(input.filename)
# # Convert a list of molecules into a dataframe
# mols = dm.to_df(suppl)
# # Generate a RDKit molecule column
# mols["mol"] = mols.smiles.apply(Chem.MolFromSmiles)
# # Display first molecule in dataframe
# image = mols.iloc[0]["mol"]
# return image
# --Code test:
# Reading data from .csv file
df = pl.read_csv("df_ai.csv")
#df.head()
df = df.to_pandas()
#df.head()
#type(df)
# Best to use copy of df to avoid changing original dataset object
df = df.copy()
# Generate RDKit molecules as a column from dataframe
df["mol"] = df.Smiles.apply(Chem.MolFromSmiles)
df
mols = df["mol"]
mols
# Pandas series
type(mols)
#mols = list(mols)
# Pandas list
#type(mols)
# df.set_index(["Name"])
# df
# --Testing MolsToGridImage - gives IPython.core.display.Image object
# which means this'll likely only work for Jupyter notebook environment only
# image = Draw.MolsToGridImage(mols, molsPerRow=4, returnPNG=True)
# image
# --Testing MolsToImage - saving molecules as PNG file & open PNG file directly
# **Shorter code for simple mols to image function only**
# img_test = Draw.MolsToImage(mols)
# img_test
# img_test.save("antiinf.png")
# Potentially replacing below code with the PyShiny example
# of using file path to open PNG image
# image_new = Image.open("anti-inf.png")
# image_new.show()
# --Testing MolToFile
# **Write a function to allow input of index number & file name to save 2 or more molecules in 1 file**
# MolToFile() can only save a single compound (specify index position) as PNG file
# Draft function v.1 - saving specified compound as PNG file:
# index = index position number of each compound
# file_name = name of PNG file to be saved
# def select_molecules(index, file_name):
# for mol in df["mol"]:
# image = Draw.MolToFile(mols[index], f"{file_name}.png")
# return image
# select_molecules(0, "test")
# select_molecules(1, "test1")
# img = Image.open(f"{file_name}".png)
# blank_image = Image.new("RGB", (600, 600))
# #Draw.MolToFile(mols[2], "anti.png")
# Draw.MolToFile(mols[0], "af1.png")
# Draw.MolToFile(mols[1], "af2.png")
# # --Using PIL/Pillow to manipulate images
# img1 = Image.open("af1.png")
# img2 = Image.open("af2.png")
# blank_image = Image.new("RGB", (600, 300))
# blank_image.paste(img1, (0, 0))
# blank_image.paste(img2, (300, 0))
# blank_image.save("merged.png")
# --RDKit Cairo molecule drawer - saving molecules as PNG image file
# **Longer code but with other functions e.g. saving PNG data as string and others**
# Compounds stacked on top of each other in PNG initially (change the frame size parameters)
# Saving 2D compound image as PNG - sample frame size: 500,180,200,180
# Code below:
# drawer = rdMolDraw2D.MolDraw2DCairo(2000,2000,300,300)
# drawer.drawOptions().useBWAtomPalette()
# drawer.DrawMolecules(mols)
# drawer.FinishDrawing()
# drawer.WriteDrawingText('anti-inf.png')
# Open the PNG file to show image
# image_test = Image.open("anti-inf.png")
# image_test.show()
# A slightly different version adding atom labels
# Better image resolution
# mol = Chem.MolFromSmiles('c1ccccc1O')
# d = rdMolDraw2D.MolDraw2DCairo(250, 200)
# d.drawOptions().addAtomIndices = True
# d.DrawMolecule(mol)
# d.FinishDrawing()
# with open('atom_annotation_1.png', 'wb') as f:
# f.write(d.GetDrawingText())
# --RDKit SVG molecule drawer - produces a long string of SVG data
# drawer = rdMolDraw2D.MolDraw2DSVG(2000,2000,300,300)
# drawer.drawOptions().useBWAtomPalette()
# drawer.DrawMolecules(mols)
# drawer.FinishDrawing()
# drawer.GetDrawingText()
# --Example of opening byte array data
# import io
# f = io.BytesIO(received_data)
# im = Image.open(f)
# --Trialled FrameToGridImage() - produces IPython.core.display.Image object
# rdkit.Chem.PandasTools.FrameToGridImage(frame, column='ROMol', legendsCol=None, **kwargs)
# df
# Chem.PandasTools.FrameToGridImage(df, column = "mol")
# Testing Lasso highlight with multiple substructures with PNG image
# Trialled & worked, likely better to stay in a notebook format
# Example from https://dev.to/dessygil/lasso-highlighting-in-datamol-36l3
# import datamol as dm
# target_molecule = "CO[C@@H](O)C1=C(O[C@H](F)Cl)C(C#N)=C1ONNC[NH3+]"
# substructure = ["CONN", "N#CC~CO"]
# dm.lasso_highlight_image(target_molecule, substructure, (400, 400), use_svg=False)
# Test "Highlight Molecule Differences" from RDKit:
# Best to stay in notebook format!
# from rdkit import Chem
# from rdkit.Chem import Draw
# from rdkit.Chem import rdFMCS
# from rdkit.Chem.Draw import rdDepictor
# rdDepictor.SetPreferCoordGen(True)
# mol1 = Chem.MolFromSmiles('FC1=CC=C2C(=C1)C=NN2')
# mol2 = Chem.MolFromSmiles('CCC1=C2NN=CC2=CC(Cl)=C1')
# def view_difference(mol1, mol2):
# mcs = rdFMCS.FindMCS([mol1,mol2])
# mcs_mol = Chem.MolFromSmarts(mcs.smartsString)
# match1 = mol1.GetSubstructMatch(mcs_mol)
# target_atm1 = []
# for atom in mol1.GetAtoms():
# if atom.GetIdx() not in match1:
# target_atm1.append(atom.GetIdx())
# match2 = mol2.GetSubstructMatch(mcs_mol)
# target_atm2 = []
# for atom in mol2.GetAtoms():
# if atom.GetIdx() not in match2:
# target_atm2.append(atom.GetIdx())
# # Works with MolsToGridImage(), not MolToImage()
# return Draw.MolToImage([mol1, mol2],highlightAtoms=[target_atm1, target_atm2])
# view_difference(mol1,mol2)
# **So far the best for Shiny for Python app to add molecule highlighting**
# Testing MolToImage() - with function for highlighting atoms & bonds!
# from matplotlib.colors import ColorConverter
# img = Draw.MolToImage(mols[1], highlightAtoms=[1,2,3], highlightBonds = [1,2], highlightColor=ColorConverter().to_rgb("aqua"))
# img.save("molecule.png")
# Below code used to trial different atom & bond number inputs:
# atomlist = list(range(1, 51))
# atomlist
# bondlist = list(range(1, 51))
# bondlist
# type(bondlist)
# Sample methods to convert string to integers
#list = list(map(int, a))
#test_list = list(map(int, test_list))
# Use list comprehension
# a = "1, 2, 3"
# numbers = [int(n) for n in a.split(",")]
# numbers
# Thinking of adding atom & bond indices to compound:
# Example code:
# d2d = rdMolDraw2D.MolDraw2DSVG(350,300)
# d2d.drawOptions().addAtomIndices=True
# d2d.DrawMolecule(diclofenac)
# d2d.FinishDrawing()
# SVG(d2d.GetDrawingText())
# Good image resolution in Jupyter notebook
# cpd = rdMolDraw2D.MolDraw2DSVG(350, 300)
# cpd.drawOptions().addAtomIndices = True
# cpd.DrawMolecule(mol1)
# cpd.FinishDrawing()
# SVG(cpd.GetDrawingText())
# A different method that works in Jupyter notebook with shorter code
# IPythonConsole.drawOptions.addAtomIndices = True
# IPythonConsole.molSize = 300,300
# mol1
# Potentially code below may work outside of Jupyter notebook environment:
# Labels atom indices for a single molecule
# mol1 = mols[0]
# for atom in mol1.GetAtoms():
# atom.SetProp('atomLabel',str(atom.GetIdx()+1))
# mol1
# Function for labelling atom indices for any single molecule inside a series of RDKit molecules
# def get_atom_index(i):
# mol = mols[i]
# for atom in mol.GetAtoms():
# # Decided not too add "+1" since native atom indexing starts at 0 due to using Python!
# # So the atom index will start at 0 (= first atom)
# atom.SetProp('atomLabel', str(atom.GetIdx()))
# return mol
# get_atom_index(0)
# get_atom_index(1)
# get_atom_index(2)
# get_atom_index(4)