Spaces:

scikit-learn
/

baseline-trainer

Runtime error

App Files Files Community

mervenoyan commited on Jul 5, 2022

Commit

9541eae

•

1 Parent(s): ac70dee

initial commit

Browse files

Files changed (1) hide show

app.py +126 -0

app.py ADDED Viewed

	@@ -0,0 +1,126 @@

+import gradio as gr
+import pandas as pd
+from huggingface_hub.hf_api import create_repo, upload_folder, upload_file
+from huggingface_hub.repository import Repository
+import subprocess
+import os
+import tempfile
+from uuid import uuid4
+import pickle
+import sweetviz as sv
+import dabl
+import re
+def analyze_datasets(dataset, dataset_name, username, token, column=None, pairwise="off"):
+    df = pd.read_csv(dataset.name)
+    if column is not None:
+        analyze_report = sv.analyze(df, target_feat=column, pairwise_analysis=pairwise)
+    else:
+        analyze_report = sv.analyze(df, pairwise_analysis=pairwise)
+        analyze_report.show_html('index.html', open_browser=False)
+    repo_url = create_repo(f"{username}/{dataset_name}", repo_type = "space", token = token, space_sdk = "static", private=False)
+    upload_file(path_or_fileobj ="./index.html", path_in_repo = "index.html", repo_id =f"{username}/{dataset_name}", repo_type = "space", token=token)
+    readme = f"---\ntitle: {dataset_name}\nemoji: ✨\ncolorFrom: green\ncolorTo: red\nsdk: static\npinned: false\ntags:\n- dataset-report\n---"
+    with open("README.md", "w+") as f:
+        f.write(readme)
+    upload_file(path_or_fileobj ="./README.md", path_in_repo = "README.md", repo_id =f"{username}/{dataset_name}", repo_type = "space", token=token)
+    return f"Your dataset report will be ready at {repo_url}"
+from sklearn.utils import estimator_html_repr
+def extract_estimator_config(model):
+    hyperparameter_dict = model.get_params(deep=True)
+    table = "| Hyperparameters | Value |\n| :-- | :-- |\n"
+    for hyperparameter, value in hyperparameter_dict.items():
+        table += f"| {hyperparameter} | {value} |\n"
+    return table
+def train_baseline(dataset, username, dataset_name, token, column):
+    df = pd.read_csv(dataset.name)
+    fc = dabl.SimpleClassifier(random_state=0)
+    df_clean = dabl.clean(df)
+    X = df_clean.drop(column, axis = 1)
+    y = df_clean[column]
+    with tempfile.TemporaryDirectory() as tmpdirname:
+        from contextlib import redirect_stdout
+        with open('logs.txt', 'w') as f:
+            with redirect_stdout(f):
+                print('Logging training')
+                fc.fit(X, y)
+        repo_url = create_repo(repo_id = f"{username}/{dataset_name}", token = token)
+        readme = f"---\nlicense: apache-2.0\nlibrary_name: sklearn\n---\n\n"
+        readme += f"## Baseline Model trained on {dataset_name} to predict {column}\n\n"
+        readme+="Metrics of the best model:\n\n"
+        for elem in str(fc.current_best_).split("\n"):
+            readme+= f"{elem}\n\n"
+        readme+= "\n\nSee model plot below:\n\n"
+        readme+= re.sub(r"\n\s+", "", str(estimator_html_repr(fc.est_)))
+        with open(f"{tmpdirname}/README.md", "w+") as f:
+            f.write(readme)
+        with open(f"{tmpdirname}/clf.pkl", mode="bw") as f:
+            pickle.dump(fc, file=f)
+        upload_folder(repo_id =f"{username}/{dataset_name}", folder_path=tmpdirname, repo_type = "model", token=token, path_in_repo="./")
+    return f"Your model will be ready at {repo_url}"
+with gr.Blocks() as demo:
+    main_title = gr.Markdown("""# Baseline Trainer 🪄🌟✨""")
+    main_desc = gr.Markdown("""This app trains a baseline model for a given dataset and pushes it to your Hugging Face Hub Profile with a model card.""")
+    with gr.Tabs():
+        with gr.TabItem("Baseline Trainer") as baseline_trainer:
+            with gr.Row():
+                with gr.Column():
+                    title = gr.Markdown(""" ## Train a supervised baseline model""")
+                    description = gr.Markdown("This app trains a model and pushes it to your Hugging Face Hub Profile.")
+                    dataset = gr.File(label = "Dataset")
+                    column = gr.Text(label = "Enter target variable:")
+                    dataset_name = gr.Text(label = "Enter dataset name:")
+                    pushing_desc = gr.Markdown("This app needs your Hugging Face Hub user name, token and a unique name for your dataset report.")
+                    token = gr.Textbox(label = "Your Hugging Face Token")
+                    username = gr.Textbox(label = "Your Hugging Face User Name")
+                    inference_run = gr.Button("Train")
+                    inference_progress = gr.StatusTracker(cover_container=True)
+                outcome = gr.outputs.Textbox(label = "Progress")
+                inference_run.click(
+                    train_baseline,
+                    inputs=[dataset, username, dataset_name, token, column],
+                    outputs=outcome,
+                    status_tracker=inference_progress,
+                )
+        with gr.TabItem("Analyze") as analyze:
+            with gr.Row():
+                with gr.Column():
+                    title = gr.Markdown(""" ## Analyze Dataset """)
+                    description = gr.Markdown("Analyze a dataset or predictive variables against a target variable in a dataset (enter a column name to column section if you want to compare against target value). You can also do pairwise analysis, but it has quadratic complexity.")
+                    dataset = gr.File(label = "Dataset")
+                    column = gr.Text(label = "Compare dataset against a target variable (Optional)")
+                    pairwise = gr.Radio(["off", "on"], label = "Enable pairwise analysis")
+                    token = gr.Textbox(label = "Your Hugging Face Token")
+                    username = gr.Textbox(label = "Your Hugging Face User Name")
+                    dataset_name = gr.Textbox(label = "Dataset Name")
+                    pushing_desc = gr.Markdown("This app needs your Hugging Face Hub user name, token and a unique name for your dataset report.")
+                    inference_run = gr.Button("Infer")
+                    inference_progress = gr.StatusTracker(cover_container=True)
+                outcome = gr.outputs.Textbox()
+                inference_run.click(
+                    analyze_datasets,
+                    inputs=[dataset, dataset_name, username, token, column, pairwise],
+                    outputs=outcome,
+                    status_tracker=inference_progress,
+                )
+demo.launch(debug=True)