Spaces:
Sleeping
Sleeping
legend1234
commited on
Commit
β’
5bd9791
1
Parent(s):
ed190ed
Reformat the layour
Browse files- app.py +83 -13
- test_input_sdf.sdf β sample_input.sdf +0 -0
- test_SMILES.csv β sample_input_smiles.csv +0 -0
app.py
CHANGED
@@ -5,12 +5,17 @@ from io import StringIO
|
|
5 |
import joblib
|
6 |
import numpy as np
|
7 |
import pandas as pd
|
|
|
8 |
# page set up
|
9 |
import streamlit as st
|
10 |
from b3clf.descriptor_padel import compute_descriptors
|
11 |
from b3clf.geometry_opt import geometry_optimize
|
12 |
-
from b3clf.utils import (
|
13 |
-
|
|
|
|
|
|
|
|
|
14 |
from streamlit_ketcher import st_ketcher
|
15 |
|
16 |
st.set_page_config(
|
@@ -54,10 +59,24 @@ scaler = joblib.load("pre_trained/b3clf_scaler.joblib")
|
|
54 |
|
55 |
keep_features = "no"
|
56 |
keep_sdf = "no"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
57 |
|
58 |
|
59 |
def generate_predictions(
|
60 |
-
|
61 |
sep: str = "\s+|\t+",
|
62 |
clf: str = "xgb",
|
63 |
sampling: str = "classic_ADASYN",
|
@@ -68,14 +87,14 @@ def generate_predictions(
|
|
68 |
"""
|
69 |
# mol_tag = os.path.splitext(uploaded_file.name)[0]
|
70 |
# uploaded_file = uploaded_file.read().decode("utf-8")
|
71 |
-
mol_tag = os.path.basename(
|
72 |
internal_sdf = f"{mol_tag}_optimized_3d.sdf"
|
73 |
|
74 |
# Geometry optimization
|
75 |
# Input:
|
76 |
# * Either an SDF file with molecular geometries or a text file with SMILES strings
|
77 |
|
78 |
-
geometry_optimize(input_fname=
|
79 |
|
80 |
df_features = compute_descriptors(
|
81 |
sdf_file=internal_sdf,
|
@@ -132,12 +151,55 @@ info_column, upload_column = st.columns(2)
|
|
132 |
|
133 |
with upload_column:
|
134 |
st.subheader("Molecule Input")
|
135 |
-
|
136 |
-
|
137 |
-
|
138 |
-
|
139 |
-
|
140 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
141 |
# st.write("The content of the file will be displayed below once uploaded.")
|
142 |
# if file:
|
143 |
# if "csv" in file.name or "txt" in file.name:
|
@@ -156,12 +218,13 @@ with info_column:
|
|
156 |
feature_column, prediction_column = st.columns(2)
|
157 |
with feature_column:
|
158 |
st.subheader("Features")
|
|
|
159 |
placeholder_features = st.empty()
|
160 |
# placeholder_features = pd.DataFrame(index=[1, 2, 3, 4],
|
161 |
# columns=["ID", "nAcid", "ALogP", "Alogp2",
|
162 |
# "AMR", "naAromAtom", "nH", "nN"])
|
163 |
# st.dataframe(placeholder_features)
|
164 |
-
placeholder_features.text("molecular features")
|
165 |
|
166 |
with prediction_column:
|
167 |
st.subheader("Predictions")
|
@@ -177,7 +240,14 @@ if file:
|
|
177 |
# Save the uploaded file to the temporary file path
|
178 |
with open(temp_file_path, "wb") as temp_file:
|
179 |
temp_file.write(file.read())
|
180 |
-
X_features, results = generate_predictions(temp_file_path)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
181 |
|
182 |
# feture table
|
183 |
with feature_column:
|
|
|
5 |
import joblib
|
6 |
import numpy as np
|
7 |
import pandas as pd
|
8 |
+
|
9 |
# page set up
|
10 |
import streamlit as st
|
11 |
from b3clf.descriptor_padel import compute_descriptors
|
12 |
from b3clf.geometry_opt import geometry_optimize
|
13 |
+
from b3clf.utils import (
|
14 |
+
get_descriptors,
|
15 |
+
predict_permeability,
|
16 |
+
scale_descriptors,
|
17 |
+
select_descriptors,
|
18 |
+
)
|
19 |
from streamlit_ketcher import st_ketcher
|
20 |
|
21 |
st.set_page_config(
|
|
|
59 |
|
60 |
keep_features = "no"
|
61 |
keep_sdf = "no"
|
62 |
+
classifiers_dict = {
|
63 |
+
"decision trees": "dtree",
|
64 |
+
"kNN": "knn",
|
65 |
+
"logsistical regression": "logreg",
|
66 |
+
"XGBoost": "xgb",
|
67 |
+
}
|
68 |
+
resample_methods_dict = {
|
69 |
+
"random undersampling": "classic_RandUndersampling",
|
70 |
+
"SMOTE": "classic_SMOTE",
|
71 |
+
"Borderline SMOTE": "borderline_SMOTE",
|
72 |
+
"k-means SMOTE": "kmeans_SMOTE",
|
73 |
+
"ADASYN": "classic_ADASYN",
|
74 |
+
"no resampling": "common",
|
75 |
+
}
|
76 |
|
77 |
|
78 |
def generate_predictions(
|
79 |
+
input_fname: str,
|
80 |
sep: str = "\s+|\t+",
|
81 |
clf: str = "xgb",
|
82 |
sampling: str = "classic_ADASYN",
|
|
|
87 |
"""
|
88 |
# mol_tag = os.path.splitext(uploaded_file.name)[0]
|
89 |
# uploaded_file = uploaded_file.read().decode("utf-8")
|
90 |
+
mol_tag = os.path.basename(input_fname).split(".")[0]
|
91 |
internal_sdf = f"{mol_tag}_optimized_3d.sdf"
|
92 |
|
93 |
# Geometry optimization
|
94 |
# Input:
|
95 |
# * Either an SDF file with molecular geometries or a text file with SMILES strings
|
96 |
|
97 |
+
geometry_optimize(input_fname=input_fname, output_sdf=internal_sdf, sep=sep)
|
98 |
|
99 |
df_features = compute_descriptors(
|
100 |
sdf_file=internal_sdf,
|
|
|
151 |
|
152 |
with upload_column:
|
153 |
st.subheader("Molecule Input")
|
154 |
+
with st.container():
|
155 |
+
# uneven columns
|
156 |
+
# st.columns((2, 1, 1, 1))
|
157 |
+
# two subcolumns for sample input files
|
158 |
+
sample_sdf_column, classifier_col = st.columns(2)
|
159 |
+
with sample_sdf_column:
|
160 |
+
# download sample sdf
|
161 |
+
with open("sample_input.sdf", "r") as file_sdf:
|
162 |
+
btn = st.download_button(
|
163 |
+
label="Download SDF sample file",
|
164 |
+
data=file_sdf,
|
165 |
+
file_name="sample_input.sdf",
|
166 |
+
)
|
167 |
+
with classifier_col:
|
168 |
+
classifier = st.selectbox(
|
169 |
+
label="Classification algorithm:",
|
170 |
+
options=("XGBoost", "kNN", "decision trees", "logsistical regression"),
|
171 |
+
)
|
172 |
+
|
173 |
+
sample_smiles_column, resampler_col = st.columns(2)
|
174 |
+
with sample_smiles_column:
|
175 |
+
# download sample smiles
|
176 |
+
with open("sample_input_smiles.csv", "r") as file_smi:
|
177 |
+
btn = st.download_button(
|
178 |
+
label="Download SMILES sample file",
|
179 |
+
data=file_smi,
|
180 |
+
file_name="sample_input_smiles.csv",
|
181 |
+
)
|
182 |
+
with resampler_col:
|
183 |
+
resampler = st.selectbox(
|
184 |
+
label="Resampling method:",
|
185 |
+
options=(
|
186 |
+
"ADASYN",
|
187 |
+
"random undersampling",
|
188 |
+
"Borderline SMOTE",
|
189 |
+
"k-means SMOTE",
|
190 |
+
"SMOTE",
|
191 |
+
"no resampling",
|
192 |
+
),
|
193 |
+
)
|
194 |
+
|
195 |
+
# horizontal line
|
196 |
+
st.divider()
|
197 |
+
file = st.file_uploader(
|
198 |
+
label="Upload a CSV, SDF or TXT file",
|
199 |
+
type=["csv", "sdf", "txt"],
|
200 |
+
help="Input molecule file and only text files are supported.",
|
201 |
+
# accept_multiple_files=False,
|
202 |
+
)
|
203 |
# st.write("The content of the file will be displayed below once uploaded.")
|
204 |
# if file:
|
205 |
# if "csv" in file.name or "txt" in file.name:
|
|
|
218 |
feature_column, prediction_column = st.columns(2)
|
219 |
with feature_column:
|
220 |
st.subheader("Features")
|
221 |
+
|
222 |
placeholder_features = st.empty()
|
223 |
# placeholder_features = pd.DataFrame(index=[1, 2, 3, 4],
|
224 |
# columns=["ID", "nAcid", "ALogP", "Alogp2",
|
225 |
# "AMR", "naAromAtom", "nH", "nN"])
|
226 |
# st.dataframe(placeholder_features)
|
227 |
+
# placeholder_features.text("molecular features")
|
228 |
|
229 |
with prediction_column:
|
230 |
st.subheader("Predictions")
|
|
|
240 |
# Save the uploaded file to the temporary file path
|
241 |
with open(temp_file_path, "wb") as temp_file:
|
242 |
temp_file.write(file.read())
|
243 |
+
# X_features, results = generate_predictions(temp_file_path)
|
244 |
+
X_features, results = generate_predictions(
|
245 |
+
input_fname=temp_file_path,
|
246 |
+
sep="\s+|\t+",
|
247 |
+
clf=classifiers_dict[classifier],
|
248 |
+
sampling=resample_methods_dict[resampler],
|
249 |
+
time_per_mol=120,
|
250 |
+
)
|
251 |
|
252 |
# feture table
|
253 |
with feature_column:
|
test_input_sdf.sdf β sample_input.sdf
RENAMED
File without changes
|
test_SMILES.csv β sample_input_smiles.csv
RENAMED
File without changes
|