Spaces:

legend1234
/

b3clf_hf

Sleeping

App Files Files Community

legend1234 commited on Oct 8, 2023

Commit

5bd9791

•

1 Parent(s): ed190ed

Reformat the layour

Browse files

Files changed (3) hide show

app.py +83 -13
test_input_sdf.sdf → sample_input.sdf +0 -0
test_SMILES.csv → sample_input_smiles.csv +0 -0

app.py CHANGED Viewed

@@ -5,12 +5,17 @@ from io import StringIO
 import joblib
 import numpy as np
 import pandas as pd
 # page set up
 import streamlit as st
 from b3clf.descriptor_padel import compute_descriptors
 from b3clf.geometry_opt import geometry_optimize
-from b3clf.utils import (get_descriptors, predict_permeability,
-                         scale_descriptors, select_descriptors)
 from streamlit_ketcher import st_ketcher
 st.set_page_config(
@@ -54,10 +59,24 @@ scaler = joblib.load("pre_trained/b3clf_scaler.joblib")
 keep_features = "no"
 keep_sdf = "no"
 def generate_predictions(
-    uploaded_file: st.file_uploader,
     sep: str = "\s+|\t+",
     clf: str = "xgb",
     sampling: str = "classic_ADASYN",
@@ -68,14 +87,14 @@ def generate_predictions(
     """
     # mol_tag = os.path.splitext(uploaded_file.name)[0]
     # uploaded_file = uploaded_file.read().decode("utf-8")
-    mol_tag = os.path.basename(uploaded_file).split(".")[0]
     internal_sdf = f"{mol_tag}_optimized_3d.sdf"
     # Geometry optimization
     # Input:
     # * Either an SDF file with molecular geometries or a text file with SMILES strings
-    geometry_optimize(input_fname=uploaded_file, output_sdf=internal_sdf, sep=sep)
     df_features = compute_descriptors(
         sdf_file=internal_sdf,
@@ -132,12 +151,55 @@ info_column, upload_column = st.columns(2)
 with upload_column:
     st.subheader("Molecule Input")
-    file = st.file_uploader(
-        label="Upload a CSV, SDF or TXT file",
-        type=["csv", "sdf", "txt"],
-        help="Input molecule file and only text files are supported.",
-        # accept_multiple_files=False,
-    )
 # st.write("The content of the file will be displayed below once uploaded.")
 # if file:
 # if "csv" in file.name or "txt" in file.name:
@@ -156,12 +218,13 @@ with info_column:
 feature_column, prediction_column = st.columns(2)
 with feature_column:
     st.subheader("Features")
     placeholder_features = st.empty()
     # placeholder_features = pd.DataFrame(index=[1, 2, 3, 4],
     #                                     columns=["ID", "nAcid", "ALogP", "Alogp2",
     #                                              "AMR", "naAromAtom", "nH", "nN"])
     # st.dataframe(placeholder_features)
-    placeholder_features.text("molecular features")
 with prediction_column:
     st.subheader("Predictions")
@@ -177,7 +240,14 @@ if file:
     # Save the uploaded file to the temporary file path
     with open(temp_file_path, "wb") as temp_file:
         temp_file.write(file.read())
-    X_features, results = generate_predictions(temp_file_path)
     # feture table
     with feature_column:

 import joblib
 import numpy as np
 import pandas as pd
 # page set up
 import streamlit as st
 from b3clf.descriptor_padel import compute_descriptors
 from b3clf.geometry_opt import geometry_optimize
+from b3clf.utils import (
+    get_descriptors,
+    predict_permeability,
+    scale_descriptors,
+    select_descriptors,
+)
 from streamlit_ketcher import st_ketcher
 st.set_page_config(
 keep_features = "no"
 keep_sdf = "no"
+classifiers_dict = {
+    "decision trees": "dtree",
+    "kNN": "knn",
+    "logsistical regression": "logreg",
+    "XGBoost": "xgb",
+}
+resample_methods_dict = {
+    "random undersampling": "classic_RandUndersampling",
+    "SMOTE": "classic_SMOTE",
+    "Borderline SMOTE": "borderline_SMOTE",
+    "k-means SMOTE": "kmeans_SMOTE",
+    "ADASYN": "classic_ADASYN",
+    "no resampling": "common",
+}
 def generate_predictions(
+    input_fname: str,
     sep: str = "\s+|\t+",
     clf: str = "xgb",
     sampling: str = "classic_ADASYN",
     """
     # mol_tag = os.path.splitext(uploaded_file.name)[0]
     # uploaded_file = uploaded_file.read().decode("utf-8")
+    mol_tag = os.path.basename(input_fname).split(".")[0]
     internal_sdf = f"{mol_tag}_optimized_3d.sdf"
     # Geometry optimization
     # Input:
     # * Either an SDF file with molecular geometries or a text file with SMILES strings
+    geometry_optimize(input_fname=input_fname, output_sdf=internal_sdf, sep=sep)
     df_features = compute_descriptors(
         sdf_file=internal_sdf,
 with upload_column:
     st.subheader("Molecule Input")
+    with st.container():
+        # uneven columns
+        # st.columns((2, 1, 1, 1))
+        # two subcolumns for sample input files
+        sample_sdf_column, classifier_col = st.columns(2)
+        with sample_sdf_column:
+            # download sample sdf
+            with open("sample_input.sdf", "r") as file_sdf:
+                btn = st.download_button(
+                    label="Download SDF sample file",
+                    data=file_sdf,
+                    file_name="sample_input.sdf",
+                )
+        with classifier_col:
+            classifier = st.selectbox(
+                label="Classification algorithm:",
+                options=("XGBoost", "kNN", "decision trees", "logsistical regression"),
+            )
+        sample_smiles_column, resampler_col = st.columns(2)
+        with sample_smiles_column:
+            # download sample smiles
+            with open("sample_input_smiles.csv", "r") as file_smi:
+                btn = st.download_button(
+                    label="Download SMILES sample file",
+                    data=file_smi,
+                    file_name="sample_input_smiles.csv",
+                )
+        with resampler_col:
+            resampler = st.selectbox(
+                label="Resampling method:",
+                options=(
+                    "ADASYN",
+                    "random undersampling",
+                    "Borderline SMOTE",
+                    "k-means SMOTE",
+                    "SMOTE",
+                    "no resampling",
+                ),
+            )
+        # horizontal line
+        st.divider()
+        file = st.file_uploader(
+            label="Upload a CSV, SDF or TXT file",
+            type=["csv", "sdf", "txt"],
+            help="Input molecule file and only text files are supported.",
+            # accept_multiple_files=False,
+        )
 # st.write("The content of the file will be displayed below once uploaded.")
 # if file:
 # if "csv" in file.name or "txt" in file.name:
 feature_column, prediction_column = st.columns(2)
 with feature_column:
     st.subheader("Features")
     placeholder_features = st.empty()
     # placeholder_features = pd.DataFrame(index=[1, 2, 3, 4],
     #                                     columns=["ID", "nAcid", "ALogP", "Alogp2",
     #                                              "AMR", "naAromAtom", "nH", "nN"])
     # st.dataframe(placeholder_features)
+    # placeholder_features.text("molecular features")
 with prediction_column:
     st.subheader("Predictions")
     # Save the uploaded file to the temporary file path
     with open(temp_file_path, "wb") as temp_file:
         temp_file.write(file.read())
+    # X_features, results = generate_predictions(temp_file_path)
+    X_features, results = generate_predictions(
+        input_fname=temp_file_path,
+        sep="\s+|\t+",
+        clf=classifiers_dict[classifier],
+        sampling=resample_methods_dict[resampler],
+        time_per_mol=120,
+    )
     # feture table
     with feature_column:

test_input_sdf.sdf → sample_input.sdf RENAMED Viewed

File without changes

test_SMILES.csv → sample_input_smiles.csv RENAMED Viewed

File without changes