Spaces:

ibm
/

FM4M-demo1

Running

App Files Files Community

ipd commited on 27 days ago

Commit

fb1e7c8

•

1 Parent(s): b2f420b

path fixed

Browse files

Files changed (4) hide show

README.md +4 -0
app.py +9 -4
models/fm4m.py +9 -222
representation/esol_smi-ted.pkl +2 -2

README.md CHANGED Viewed

@@ -8,6 +8,10 @@ sdk_version: 5.4.0
 app_file: app.py
 pinned: false
 license: apache-2.0
 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 app_file: app.py
 pinned: false
 license: apache-2.0
+models:
+  - ibm/materials.smi-ted
+  - ibm/materials.selfies-ted
+  - ibm/materials.mhg-ged
 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

app.py CHANGED Viewed

@@ -35,11 +35,16 @@ theme = gr.themes.Default().set(
     text_color="#FFFFFF",            # テキスト色を白に設定
 )
 """
 import sys
 sys.path.append("models")
 sys.path.append("../models")
-sys.path.append("../")
 import models.fm4m as fm4m
@@ -388,8 +393,8 @@ def display_plot(plot_type):
 # Predefined dataset paths (these should be adjusted to your file paths)
 predefined_datasets = {
-    "Bace": f"data/bace/train.csv, data/bace/test.csv, smiles, Class",
-    "ESOL": f"data/esol/train.csv, data/esol/test.csv, smiles, prop",
 }

     text_color="#FFFFFF",            # テキスト色を白に設定
 )
 """
+"""
 import sys
 sys.path.append("models")
 sys.path.append("../models")
+sys.path.append("../")"""
+# Get the current file's directory
+base_dir = os.path.dirname(__file__)
+print("Base Dir : ", base_dir)
 import models.fm4m as fm4m
 # Predefined dataset paths (these should be adjusted to your file paths)
 predefined_datasets = {
+    "Bace": f"./data/bace/train.csv, ./data/bace/test.csv, smiles, Class",
+    "ESOL": f"./data/esol/train.csv, ./data/esol/test.csv, smiles, prop",
 }

models/fm4m.py CHANGED Viewed

@@ -25,12 +25,9 @@ from sklearn.preprocessing import MinMaxScaler
 import torch
 from transformers import AutoTokenizer, AutoModel
-import sys
-sys.path.append("models/")
-from models.selfies_model.load import SELFIES as bart
-from models.mhg_model import load as mhg
-from models.smi_ted.smi_ted_light.load import load_smi_ted
 datasets = {}
 models = {}
@@ -181,175 +178,6 @@ def update_downstream_model_list(list_model):
 avail_models_data()
-def list_models():
-    #print(*list(models.keys()),sep='\n')
-    data = avail_models(raw=True)
-    # Convert data to a pandas DataFrame
-    df = pd.DataFrame(data)
-    # Add a column for displaying row numbers starting from 1
-    df.index += 1
-    # Create dropdown widget for sorting
-    sort_dropdown = widgets.Dropdown(
-        options=['Name', 'Timestamp'],
-        value='Name',
-        description='Sort by:',
-        disabled=False,
-    )
-    # Output widget to display the table
-    output = widgets.Output()
-    # Define function to update display based on sorting
-    def update_display(change):
-        with output:
-            output.clear_output(wait=True)
-            sorted_df = df.sort_values(by=sort_dropdown.value)
-            display(sorted_df.style.set_properties(**{
-                'text-align': 'left', 'border': '1px solid #ddd',
-            }))
-    # Attach the update_display function to the dropdown widget
-    sort_dropdown.observe(update_display, names='value')
-    # Display the dropdown and the table initially
-    display(sort_dropdown, output)
-    update_display(None)
-def list_downstream_models():
-    #print(*list(models.keys()),sep='\n')
-    data = avail_downstream_models()
-    # Convert data to a pandas DataFrame
-    df = pd.DataFrame(data)
-    # Add a column for displaying row numbers starting from 1
-    df.index += 1
-    # Create dropdown widget for sorting
-    sort_dropdown = widgets.Dropdown(
-        options=['Name', 'Timestamp'],
-        value='Timestamp',
-        description='Sort by:',
-        disabled=False,
-    )
-    # Output widget to display the table
-    output = widgets.Output()
-    # Define function to update display based on sorting
-    def update_display(change):
-        with output:
-            output.clear_output(wait=True)
-            sorted_df = df.sort_values(by=sort_dropdown.value)
-            display(sorted_df.style.set_properties(**{
-                'text-align': 'left', 'border': '1px solid #ddd',
-            }))
-    # Attach the update_display function to the dropdown widget
-    sort_dropdown.observe(update_display, names='value')
-    # Display the dropdown and the table initially
-    display(sort_dropdown, output)
-    update_display(None)
-def list_data():
-    #print(*list(datasets.keys()),sep='\n')
-    data = avail_datasets()
-    # Convert data to a pandas DataFrame
-    df = pd.DataFrame(data)
-    # Add a column for displaying row numbers starting from 1
-    df.index += 1
-    # Create dropdown widget for sorting
-    sort_dropdown = widgets.Dropdown(
-        options=['Dataset', 'Input', 'Output', 'Path', 'Timestamp'],
-        value='Input',
-        description='Sort by:',
-        disabled=False,
-    )
-    # Output widget to display the table
-    output = widgets.Output()
-    # Define function to update display based on sorting
-    def update_display(change):
-        with output:
-            output.clear_output(wait=True)
-            sorted_df = df.sort_values(by=sort_dropdown.value)
-            display(sorted_df.style.set_properties(**{
-                'text-align': 'left', 'border': '1px solid #ddd',
-            }))
-    # Attach the update_display function to the dropdown widget
-    sort_dropdown.observe(update_display, names='value')
-    # Display the dropdown and the table initially
-    display(sort_dropdown, output)
-    update_display(None)
-def vizualize(roc_auc,fpr, tpr, features, labels):
-    #def vizualize(features, labels):
-    reducer = umap.UMAP(metric="jaccard", n_neighbors=20, n_components=2, low_memory=True, min_dist=0.001, verbose=False)
-    features_umap = reducer.fit_transform(features)
-    x = labels.values
-    index_0 = [index for index in range(len(x)) if x[index] == 0]
-    index_1 = [index for index in range(len(x)) if x[index] == 1]
-    class_0 = features_umap[index_0]
-    class_1 = features_umap[index_1]
-    # Function to create ROC AUC plot
-    def plot_roc_auc():
-        plt.figure(figsize=(8, 6))
-        plt.plot(fpr, tpr, color='darkorange', lw=2, label=f'ROC curve (area = {roc_auc:.4f})')
-        plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
-        plt.xlim([0.0, 1.0])
-        plt.ylim([0.0, 1.05])
-        plt.xlabel('False Positive Rate')
-        plt.ylabel('True Positive Rate')
-        plt.title('Receiver Operating Characteristic')
-        plt.legend(loc='lower right')
-        plt.show()
-    # Function to create scatter plot of the dataset distribution
-    def plot_distribution():
-        plt.figure(figsize=(8, 6))
-        #plt.scatter(X[:, 0], X[:, 1], c=y, cmap=plt.cm.coolwarm, edgecolors='k')
-        plt.scatter(class_1[:, 0], class_1[:, 1], c='red', label='Class 1')
-        plt.scatter(class_0[:, 0], class_0[:, 1], c='blue', label='Class 0')
-        plt.xlabel('Feature 1')
-        plt.ylabel('Feature 2')
-        plt.title('Dataset Distribution')
-        plt.show()
-    # Create tabs using ipywidgets
-    tab_contents = ['ROC AUC', 'Distribution']
-    children = [widgets.Output(), widgets.Output()]
-    tab = widgets.Tab()
-    tab.children = children
-    for i in range(len(tab_contents)):
-        tab.set_title(i, tab_contents[i])
-    # Display plots in their respective tabs
-    with children[0]:
-        plot_roc_auc()
-    with children[1]:
-        plot_distribution()
-    # Display the tab widget
-    display(tab)
 def get_representation(train_data,test_data,model_type, return_tensor=True):
     alias = {"MHG-GED": "mhg", "SELFIES-TED": "bart", "MolFormer": "mol-xl", "Molformer": "mol-xl", "SMI-TED": "smi-ted"}
     if model_type in alias.keys():
@@ -434,7 +262,7 @@ def single_modal(model,dataset, downstream_model,params):
     if dataset in list(df["Dataset"].values):
         task = dataset
-        with open(f"representation/{task}_{model_type}.pkl", "rb") as f1:
             x_batch, y_batch, x_batch_test, y_batch_test = pickle.load(f1)
         print(f" Representation loaded successfully")
     else:
@@ -472,7 +300,7 @@ def single_modal(model,dataset, downstream_model,params):
         print(f"ROC-AUC Score: {roc_auc:.4f}")
         try:
-            with open(f"plot_emb/{task}_{model_type}.pkl", "rb") as f1:
                 class_0,class_1 = pickle.load(f1)
         except:
             print("Generating latent plots")
@@ -505,7 +333,7 @@ def single_modal(model,dataset, downstream_model,params):
         print(f"ROC-AUC Score: {roc_auc:.4f}")
         try:
-            with open(f"plot_emb/{task}_{model_type}.pkl", "rb") as f1:
                 class_0,class_1 = pickle.load(f1)
         except:
             print("Generating latent plots")
@@ -673,7 +501,7 @@ def multi_modal(model_list,dataset, downstream_model,params):
             if i == 0:
                 if predefined:
-                    with open(f"representation/{task}_{model_type}.pkl", "rb") as f1:
                         x_batch, y_batch, x_batch_test, y_batch_test = pickle.load(f1)
                     print(f" Loaded representation/{task}_{model_type}.pkl")
                 else:
@@ -683,7 +511,7 @@ def multi_modal(model_list,dataset, downstream_model,params):
             else:
                 if predefined:
-                    with open(f"representation/{task}_{model_type}.pkl", "rb") as f1:
                         x_batch_1, y_batch_1, x_batch_test_1, y_batch_test_1 = pickle.load(f1)
                         print(f" Loaded representation/{task}_{model_type}.pkl")
                 else:
@@ -708,7 +536,7 @@ def multi_modal(model_list,dataset, downstream_model,params):
     print(f"Representations loaded successfully")
     try:
-        with open(f"plot_emb/{task}_multi.pkl", "rb") as f1:
             class_0, class_1 = pickle.load(f1)
     except:
         print("Generating latent plots")
@@ -830,47 +658,6 @@ def multi_modal(model_list,dataset, downstream_model,params):
-def finetune_optuna(x_batch,y_batch, x_batch_test, y_test ):
-    print(f" Finetuning with Optuna and calculating ROC AUC Score ...")
-    X_train = x_batch.values
-    y_train = y_batch.values
-    X_test = x_batch_test.values
-    y_test = y_test.values
-    def objective(trial):
-        # Define parameters to be optimized
-        params = {
-            # 'objective': 'binary:logistic',
-            'eval_metric': 'auc',
-            'verbosity': 0,
-            'n_estimators': trial.suggest_int('n_estimators', 1000, 10000),
-            # 'booster': trial.suggest_categorical('booster', ['gbtree', 'gblinear', 'dart']),
-            # 'lambda': trial.suggest_loguniform('lambda', 1e-8, 1.0),
-            'alpha': trial.suggest_loguniform('alpha', 1e-8, 1.0),
-            'max_depth': trial.suggest_int('max_depth', 1, 12),
-            # 'eta': trial.suggest_loguniform('eta', 1e-8, 1.0),
-            # 'gamma': trial.suggest_loguniform('gamma', 1e-8, 1.0),
-            # 'grow_policy': trial.suggest_categorical('grow_policy', ['depthwise', 'lossguide']),
-            # "subsample": trial.suggest_float("subsample", 0.05, 1.0),
-            # "colsample_bytree": trial.suggest_float("colsample_bytree", 0.05, 1.0),
-        }
-        # Train XGBoost model
-        dtrain = xgb.DMatrix(X_train, label=y_train)
-        dtest = xgb.DMatrix(X_test, label=y_test)
-        model = xgb.train(params, dtrain)
-        # Predict probabilities
-        y_pred = model.predict(dtest)
-        # Calculate ROC AUC score
-        roc_auc = roc_auc_score(y_test, y_pred)
-        print("ROC_AUC : ", roc_auc)
-        return roc_auc

 import torch
 from transformers import AutoTokenizer, AutoModel
+from .selfies_model.load import SELFIES as bart
+from .mhg_model import load as mhg
+from .smi_ted.smi_ted_light.load import load_smi_ted
 datasets = {}
 models = {}
 avail_models_data()
 def get_representation(train_data,test_data,model_type, return_tensor=True):
     alias = {"MHG-GED": "mhg", "SELFIES-TED": "bart", "MolFormer": "mol-xl", "Molformer": "mol-xl", "SMI-TED": "smi-ted"}
     if model_type in alias.keys():
     if dataset in list(df["Dataset"].values):
         task = dataset
+        with open(f"./representation/{task}_{model_type}.pkl", "rb") as f1:
             x_batch, y_batch, x_batch_test, y_batch_test = pickle.load(f1)
         print(f" Representation loaded successfully")
     else:
         print(f"ROC-AUC Score: {roc_auc:.4f}")
         try:
+            with open(f"./plot_emb/{task}_{model_type}.pkl", "rb") as f1:
                 class_0,class_1 = pickle.load(f1)
         except:
             print("Generating latent plots")
         print(f"ROC-AUC Score: {roc_auc:.4f}")
         try:
+            with open(f"./plot_emb/{task}_{model_type}.pkl", "rb") as f1:
                 class_0,class_1 = pickle.load(f1)
         except:
             print("Generating latent plots")
             if i == 0:
                 if predefined:
+                    with open(f"./representation/{task}_{model_type}.pkl", "rb") as f1:
                         x_batch, y_batch, x_batch_test, y_batch_test = pickle.load(f1)
                     print(f" Loaded representation/{task}_{model_type}.pkl")
                 else:
             else:
                 if predefined:
+                    with open(f"./representation/{task}_{model_type}.pkl", "rb") as f1:
                         x_batch_1, y_batch_1, x_batch_test_1, y_batch_test_1 = pickle.load(f1)
                         print(f" Loaded representation/{task}_{model_type}.pkl")
                 else:
     print(f"Representations loaded successfully")
     try:
+        with open(f"./plot_emb/{task}_multi.pkl", "rb") as f1:
             class_0, class_1 = pickle.load(f1)
     except:
         print("Generating latent plots")

representation/esol_smi-ted.pkl CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c063dcdc14ded2e6381bbf36b073b30303c89a1401b8ff4402e45dca00f3e118
-size 2964482

 version https://git-lfs.github.com/spec/v1
+oid sha256:52cbf2c9afa3a06ed068ba7583df229ec9a1aa823b22baecac97fa891475f85a
+size 2964232