From 737bcfe371481de7d1e60cb778516dcb6a6fcb15 Mon Sep 17 00:00:00 2001 From: Jenke Scheen Date: Wed, 25 Sep 2024 07:14:02 +0200 Subject: [PATCH 1/4] rephrase --- app.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app.py b/app.py index fb56a1b..e583ff9 100644 --- a/app.py +++ b/app.py @@ -124,7 +124,7 @@ def convert_df(df): else: st.stop() # Select a column from the DataFrame - column = st.selectbox("Select a column of SMILES analyze", df.columns) + column = st.selectbox("Select a SMILES column", df.columns) multismiles = True smiles_column = df[column] From bc4df65921d1d2e0a286c6ae7daa18fc671331f8 Mon Sep 17 00:00:00 2001 From: Jenke Scheen Date: Wed, 25 Sep 2024 07:26:11 +0200 Subject: [PATCH 2/4] generalize some variable names --- app.py | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/app.py b/app.py index e583ff9..d106e18 100644 --- a/app.py +++ b/app.py @@ -99,8 +99,8 @@ def convert_df(df): st.stop() smiles = [smiles] df = pd.DataFrame(smiles, columns=["SMILES"]) - column = "SMILES" - smiles_column = df["SMILES"] + smiles_column_name = "SMILES" + smiles_column = df[smiles_column_name] elif input == "Enter SMILES": smiles = st.text_input("Enter a SMILES string") if _is_valid_smiles(smiles): @@ -110,8 +110,8 @@ def convert_df(df): st.stop() smiles = [smiles] df = pd.DataFrame(smiles, columns=["SMILES"]) - column = "SMILES" - smiles_column = df["SMILES"] + smiles_column_name = "SMILES" + smiles_column = df[smiles_column_name] elif input == "Upload a CSV file": # Create a file uploader for CSV files uploaded_file = st.file_uploader( @@ -124,9 +124,9 @@ def convert_df(df): else: st.stop() # Select a column from the DataFrame - column = st.selectbox("Select a SMILES column", df.columns) + smiles_column_name = st.selectbox("Select a SMILES column", df.columns) multismiles = True - smiles_column = df[column] + smiles_column = df[smiles_column_name] # check if the smiles are valid valid_smiles = [_is_valid_smiles(smi) for smi in smiles_column] @@ -163,8 +163,8 @@ def convert_df(df): f"All molecule entries are valid (n={len(df)}), proceeding with prediction", icon="✅", ) - column = "SMILES" - smiles_column = df["SMILES"] + smiles_column_name = "SMILES" + smiles_column = df[smiles_column_name] multismiles = True st.markdown("## Model parameters :nut_and_bolt:") @@ -264,7 +264,9 @@ def convert_df(df): import seaborn as sns # then a scatterplot of uncertainty vs MW - df["MW"] = [MolWt(Chem.MolFromSmiles(smi)) for smi in sorted_df["SMILES"]] + df["MW"] = [ + MolWt(Chem.MolFromSmiles(smi)) for smi in sorted_df[smiles_column_name] + ] fig, ax = plt.subplots() ax = sns.scatterplot( From dccc4dae13b57b377afdf44418673130fc9a8c87 Mon Sep 17 00:00:00 2001 From: Jenke Scheen Date: Wed, 25 Sep 2024 07:41:39 +0200 Subject: [PATCH 3/4] more generalization --- app.py | 48 +++++++++++++++++++++++++++--------------------- 1 file changed, 27 insertions(+), 21 deletions(-) diff --git a/app.py b/app.py index d106e18..bc0ff75 100644 --- a/app.py +++ b/app.py @@ -59,7 +59,7 @@ def sdf_str_to_rdkit_mol(sdf): @st.cache_data def convert_df(df): # IMPORTANT: Cache the conversion to prevent computation on every rerun - return df.to_csv().encode("utf-8") + return queried_df.to_csv().encode("utf-8") # Set the title of the Streamlit app @@ -98,9 +98,9 @@ def convert_df(df): st.error("Invalid molecule", icon="🚨") st.stop() smiles = [smiles] - df = pd.DataFrame(smiles, columns=["SMILES"]) + queried_df = pd.DataFrame(smiles, columns=["SMILES"]) smiles_column_name = "SMILES" - smiles_column = df[smiles_column_name] + smiles_column = queried_df[smiles_column_name] elif input == "Enter SMILES": smiles = st.text_input("Enter a SMILES string") if _is_valid_smiles(smiles): @@ -109,9 +109,9 @@ def convert_df(df): st.error("Invalid SMILES string", icon="🚨") st.stop() smiles = [smiles] - df = pd.DataFrame(smiles, columns=["SMILES"]) + queried_df = pd.DataFrame(smiles, columns=["SMILES"]) smiles_column_name = "SMILES" - smiles_column = df[smiles_column_name] + smiles_column = queried_df[smiles_column_name] elif input == "Upload a CSV file": # Create a file uploader for CSV files uploaded_file = st.file_uploader( @@ -120,13 +120,13 @@ def convert_df(df): # If a file is uploaded, parse it into a DataFrame if uploaded_file is not None: - df = pd.read_csv(uploaded_file) + queried_df = pd.read_csv(uploaded_file) else: st.stop() # Select a column from the DataFrame - smiles_column_name = st.selectbox("Select a SMILES column", df.columns) + smiles_column_name = st.selectbox("Select a SMILES column", queried_df.columns) multismiles = True - smiles_column = df[smiles_column_name] + smiles_column = queried_df[smiles_column_name] # check if the smiles are valid valid_smiles = [_is_valid_smiles(smi) for smi in smiles_column] @@ -153,18 +153,18 @@ def convert_df(df): string_data = stringio.read() mols = sdf_str_to_rdkit_mol(string_data) smiles = [Chem.MolToSmiles(m) for m in mols] - df = pd.DataFrame(smiles, columns=["SMILES"]) + queried_df = pd.DataFrame(smiles, columns=["SMILES"]) # st.error("Error reading the SDF file, please check the input", icon="🚨") # st.stop() else: st.stop() st.success( - f"All molecule entries are valid (n={len(df)}), proceeding with prediction", + f"All molecule entries are valid (n={len(queried_df)}), proceeding with prediction", icon="✅", ) smiles_column_name = "SMILES" - smiles_column = df[smiles_column_name] + smiles_column = queried_df[smiles_column_name] multismiles = True st.markdown("## Model parameters :nut_and_bolt:") @@ -224,8 +224,8 @@ def convert_df(df): pred_column_name = f"{_target_str}_computed-{endpoint_value}" unc_column_name = f"{_target_str}_computed-{endpoint_value}_uncertainty" -df[pred_column_name] = preds -df[unc_column_name] = err +queried_df[pred_column_name] = preds +queried_df[unc_column_name] = err st.markdown("---") if multismiles: @@ -233,7 +233,7 @@ def convert_df(df): # Histogram first fig, ax = plt.subplots() - sorted_df = df.sort_values(by=pred_column_name) + sorted_df = queried_df.sort_values(by=pred_column_name) n_bins = int(len(sorted_df[pred_column_name]) / 10) if n_bins < 5: # makes the histogram slightly more interpretable with low data n_bins = 5 @@ -264,16 +264,22 @@ def convert_df(df): import seaborn as sns # then a scatterplot of uncertainty vs MW - df["MW"] = [ + queried_df["MW"] = [ MolWt(Chem.MolFromSmiles(smi)) for smi in sorted_df[smiles_column_name] ] fig, ax = plt.subplots() ax = sns.scatterplot( - x="MW", y=pred_column_name, hue=unc_column_name, palette="coolwarm", data=df + x="MW", + y=pred_column_name, + hue=unc_column_name, + palette="coolwarm", + data=queried_df, ) - norm = plt.Normalize(df[unc_column_name].min(), df[unc_column_name].max()) + norm = plt.Normalize( + queried_df[unc_column_name].min(), queried_df[unc_column_name].max() + ) sm = plt.cm.ScalarMappable(cmap="coolwarm", norm=norm) sm.set_array([]) @@ -296,10 +302,10 @@ def convert_df(df): else: # just print the prediction - preds = df[pred_column_name].values[0] - smiles = df["SMILES"].values[0] + preds = queried_df[pred_column_name].values[0] + smiles = queried_df["SMILES"].values[0] if err: - err = df[unc_column_name].values[0] + err = queried_df[unc_column_name].values[0] errstr = f"± {err:.2f}" else: errstr = "" @@ -309,7 +315,7 @@ def convert_df(df): ) # allow the user to download the predictions -csv = convert_df(df) +csv = convert_df(queried_df) st.download_button( label="Download data as CSV", data=csv, From f0817fbdf6c2722210519cbb0fa44b1d879a58b6 Mon Sep 17 00:00:00 2001 From: Hugo MacDermott-Opeskin Date: Thu, 26 Sep 2024 13:28:24 +1000 Subject: [PATCH 4/4] fix nonlocal return --- app.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app.py b/app.py index bc0ff75..25a4aa7 100644 --- a/app.py +++ b/app.py @@ -59,7 +59,7 @@ def sdf_str_to_rdkit_mol(sdf): @st.cache_data def convert_df(df): # IMPORTANT: Cache the conversion to prevent computation on every rerun - return queried_df.to_csv().encode("utf-8") + return df.to_csv().encode("utf-8") # Set the title of the Streamlit app