Merge pull request #5 from asapdiscovery/remove_OE_secret_shenannigans

hmacdope · web-flow · commit b1caff410fcb · 2024-09-30T13:37:21.000+10:00
Remove oe secret shenannigans and add basic CI
diff --git a/.github/workflows/CI.yaml b/.github/workflows/CI.yaml
@@ -0,0 +1,71 @@
+name: CI
+
+on:
+  # GitHub has started calling new repo's first branch "main" https://github.com/github/renaming
+  # The cookiecutter uses the "--initial-branch" flag when it runs git-init
+  push:
+    branches:
+      - "main"
+  pull_request:
+    branches:
+      - "main"
+  schedule:
+    # Weekly tests run on main by default:
+    #   Scheduled workflows run on the latest commit on the default or base branch.
+    #   (from https://help.github.com/en/actions/reference/events-that-trigger-workflows#scheduled-events-schedule)
+    - cron: "0 0 * * 0"
+
+defaults:
+  run:
+    shell: bash -l {0}
+
+
+concurrency:
+  group: "${{ github.workflow }}-${{ github.ref }}"
+  cancel-in-progress: true
+
+jobs:
+  test:
+    name: Test on ${{ matrix.os }}, Python ${{ matrix.python-version }}
+    runs-on: ${{ matrix.os }}
+    strategy:
+      matrix:
+        os: [macOS-latest, ubuntu-latest]
+        python-version: ["3.10"]
+      fail-fast: false
+
+    steps:
+      - uses: actions/checkout@v3
+
+      - name: Additional info about the build
+        shell: bash
+        run: |
+          uname -a
+          df -h
+          ulimit -a
+
+      # More info on options: https://github.com/marketplace/actions/provision-with-micromamba
+    # More info on options: https://github.com/mamba-org/provision-with-micromamba
+      - name: Setup Conda Environment
+        uses: mamba-org/setup-micromamba@v1
+        with:
+          # default - will pull down 2.0 which we don't want!
+          # micromamba-version: latest
+          # pin to latest 1.x release
+          micromamba-version: '1.5.10-0'
+          environment-file: environment.yml
+          environment-name: asap-ml-streamlit
+          cache-environment: true
+          cache-downloads: true
+          cache-environment-key: environment-${{ steps.date.outputs.date }}
+          cache-downloads-key: downloads-${{ steps.date.outputs.date }}
+          create-args: >-
+            python==${{ matrix.python-version }}
+
+
+      # FIXME: Make the tests run
+      # - name: Run tests
+      #   run: |
+      #     pytest -vvv --color=yes test_app.py
+
+
diff --git a/.gitignore b/.gitignore
@@ -5,4 +5,7 @@
 oe_license.txt
 
 # .env file
-.env
+.env
+
+# __pycache__ folder
+__pycache__/
diff --git a/app.py b/app.py
@@ -67,6 +67,7 @@ def convert_df(df):
 input = st.selectbox(
     "How would you like to enter your input?",
     ["Upload a CSV file", "Draw a molecule", "Enter SMILES", "Upload an SDF file"],
+    key="input",
 )
 
 multismiles = False
@@ -82,7 +83,7 @@ def convert_df(df):
     smiles_column_name = "SMILES"
     smiles_column = queried_df[smiles_column_name]
 elif input == "Enter SMILES":
-    smiles = st.text_input("Enter a SMILES string")
+    smiles = st.text_input("Enter a SMILES string", key="smiles_user_input")
     if _is_valid_smiles(smiles):
         st.success("Valid SMILES string", icon="✅")
     else:
@@ -95,7 +96,7 @@ def convert_df(df):
 elif input == "Upload a CSV file":
     # Create a file uploader for CSV files
     uploaded_file = st.file_uploader(
-        "Choose a CSV file to upload your predictions to", type="csv"
+        "Choose a CSV file to upload your predictions to", type="csv", key="csv_file"
     )
 
     # If a file is uploaded, parse it into a DataFrame
@@ -104,7 +105,7 @@ def convert_df(df):
     else:
         st.stop()
     # Select a column from the DataFrame
-    smiles_column_name = st.selectbox("Select a SMILES column", queried_df.columns)
+    smiles_column_name = st.selectbox("Select a SMILES column", queried_df.columns, key="df_smiles_column")
     multismiles = True
     smiles_column = queried_df[smiles_column_name]
 
@@ -128,15 +129,18 @@ def convert_df(df):
     # read with rdkit
     if uploaded_file is not None:
         # To convert to a string based IO:
-        stringio = StringIO(uploaded_file.getvalue().decode("utf-8"))
-        # To read file as string:
-        string_data = stringio.read()
-        mols = sdf_str_to_rdkit_mol(string_data)
-        smiles = [Chem.MolToSmiles(m) for m in mols]
-        queried_df = pd.DataFrame(smiles, columns=["SMILES"])
-        # st.error("Error reading the SDF file, please check the input", icon="🚨")
-        # st.stop()
+        try:
+            stringio = StringIO(uploaded_file.getvalue().decode("utf-8"))
+            # To read file as string:
+            string_data = stringio.read()
+            mols = sdf_str_to_rdkit_mol(string_data)
+            smiles = [Chem.MolToSmiles(m) for m in mols]
+            queried_df = pd.DataFrame(smiles, columns=["SMILES"])
+        except:
+            st.error("Error reading the SDF file, please check the input", icon="🚨")
+            st.stop()
     else:
+        st.error("No file uploaded", icon="🚨")
         st.stop()
 
     st.success(
@@ -154,11 +158,11 @@ def convert_df(df):
 # filter out None values
 targets = [t for t in targets if t is not None]
 # Select a target value from the preset list
-target_value = st.selectbox("Select a biological target ", targets)
+target_value = st.selectbox("Select a biological target ", targets, key="target")
 # endpoints
 endpoints = ASAPMLModelRegistry.get_endpoints()
 # Select a target value from the preset list
-endpoint_value = st.selectbox("Select a property ", endpoints)
+endpoint_value = st.selectbox("Select a property ", endpoints, key="endpoint")
 
 if not ASAPMLModelRegistry.endpoint_has_target(endpoint_value):
     _target = None
diff --git a/environment.yml b/environment.yml
@@ -1,3 +1,5 @@
+name: asap-ml-streamlit
+
 channels:
   - conda-forge
   - openeye
@@ -14,7 +16,6 @@ dependencies:
   - biopython
   - schedule
   - openeye-toolkits
-  - asapdiscovery
 
   # ml
   - pytorch
@@ -39,6 +40,8 @@ dependencies:
   - boto3
   - pandas
 
+  - pytest
+
 
 
     # Pip-only installs
diff --git a/test_app.py b/test_app.py
@@ -0,0 +1,71 @@
+import pytest
+import pandas as pd
+from streamlit.testing.v1 import AppTest
+
+class STTester:
+    timeout = 500
+
+
+@pytest.fixture()
+def app_path():
+    # retirn
+    return "./app.py"
+
+@pytest.fixture()
+def smiles_dataframe_data():
+    # synthetic data with multiple columns and SMILES
+    data = {
+        "mySmiles": ["CC", "CCC", "CCCC"],
+        "blah": ["1", "2", "3"],
+        "bleh": ["a", "b", "c"],
+    }
+    return pd.DataFrame(data)
+
+@pytest.fixture()
+def smiles_dataframe_data_csv(tmp_path, smiles_dataframe_data):
+    # synthetic data with multiple columns and SMILES
+    csv_path = tmp_path / "smiles_data.csv"
+    smiles_dataframe_data.to_csv(csv_path, index=False)
+    return csv_path
+
+
+
+class TestSMILES(STTester):
+
+    @pytest.mark.parametrize("target", ["SARS-CoV-2-Mpro", "MERS-CoV-Mpro"])
+    @pytest.mark.parametrize("endpoint", ["pIC50", "LogD"])
+    def test_smiles(self, app_path, endpoint, target, tmp_path):
+        at = AppTest.from_file(app_path)
+        at.run(timeout=self.timeout)
+        at.selectbox(key="input").select("Enter SMILES").run(timeout=self.timeout)
+        at.text_input(key="smiles_user_input").input("CC").run(timeout=self.timeout)
+        at.selectbox(key="target").select(target).run(timeout=self.timeout)
+        at.selectbox(key="endpoint").select(endpoint).run(timeout=self.timeout)
+        val = at.markdown[-1].value # last markdown
+        assert "CC" in val
+        if not endpoint == "LogD":
+            assert target in val
+        else:
+            assert "global" in val
+        assert endpoint in val
+        assert at.success
+
+
+
+
+class TestDataframe(STTester):
+
+    @pytest.mark.xfail(reason="No ability to mock file upload, see https://github.com/streamlit/streamlit/issues/8438")
+    @pytest.mark.parametrize("target", ["SARS-CoV-2-Mpro", "MERS-CoV-Mpro"])
+    @pytest.mark.parametrize("endpoint", ["pIC50", "LogD"])
+    def test_dataframe(self, app_path, smiles_dataframe_data_csv, target, endpoint, tmp_path):
+        at = AppTest.from_file(app_path)
+        at.run(timeout=self.timeout)
+        at.selectbox(key="input").select("Upload a CSV file").run(timeout=self.timeout)
+        # cant be bother to mock testing internals to get this to work
+        # you can also possibly use selenium to do this but seems like a lot of work
+        at.file_uploader(key="csv_file").upload(smiles_dataframe_data).run(timeout=self.timeout)
+        at.selectbox(key="df_smiles_column").select("mySmiles").run(timeout=self.timeout)
+        at.selectbox(key="target").select("SARS-CoV-2-Mpro").run(timeout=self.timeout)
+        at.selectbox(key="endpoint").select("pIC50").run(timeout=self.timeout)
+        assert at.success