File size: 2,048 Bytes
d151dad 5d3f533 d151dad 6bfef54 d151dad 44de84c e4cf83d d151dad e4cf83d d151dad ab3118d d151dad 3b67f98 6bfef54 25a4dd3 92cb149 ab3118d 6bfef54 d151dad ab3118d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 |
import pandas_profiling as pp
from huggingface_hub.hf_api import create_repo, upload_file
from huggingface_hub.repository import Repository
import gradio as gr
import pandas as pd
import subprocess
import os
import tempfile
description = "This Space will profile a dataset file that you drag and drop and push the profile report to your Hugging Face account. π \n The value in dataset name field you'll enter will be used in the namespace of the Space that will be pushed to your profile, so you can use it to version the reports too! ππ» Feel free to open a discussion in case you have any feature requests. Make sure the repository name doesn't exist."
title = "Dataset Profiler πͺβ¨"
token = gr.Textbox(label = "Your Hugging Face Token")
username = gr.Textbox(label = "Your Hugging Face User Name")
dataset_name = gr.Textbox(label = "Dataset Name")
dataset = gr.File(label = "Dataset")
output_text = gr.Textbox(label = "Status")
def profile_dataset(dataset, username, token, dataset_name):
df = pd.read_csv(dataset.name)
profile = pp.ProfileReport(df, title=f"{dataset_name} Report")
repo_url = create_repo(f"{username}/{dataset_name}", repo_type = "space", token = token, space_sdk = "static", private=False)
profile.to_file("./index.html")
upload_file(path_or_fileobj ="./index.html", path_in_repo = "index.html", repo_id =f"{username}/{dataset_name}", repo_type = "space", token=token)
readme = f"---\ntitle: {dataset_name}\nemoji: β¨\ncolorFrom: green\ncolorTo: red\nsdk: static\npinned: false\ntags:\n- dataset-report\n---"
with open("README.md", "w+") as f:
f.write(readme)
upload_file(path_or_fileobj ="./README.md", path_in_repo = "README.md", repo_id =f"{username}/{dataset_name}", repo_type = "space", token=token)
return f"Your dataset report will be ready at {repo_url}"
gr.Interface(profile_dataset, title = title, description = description, inputs = [dataset, username, token, dataset_name], outputs=[output_text], enable_queue = True).launch(debug=True) |