models-explorer / utils.py
osanseviero's picture
Add languages
6c21ae3
raw
history blame
2.43 kB
from datasets import load_dataset
import streamlit as st
from ast import literal_eval
import pandas as pd
nlp_tasks = ["text-classification", "text-generation", "text2text-generation", "token-classification", "fill-mask", "question-answering",
"translation", "conversational", "sentence-similarity", "summarization", "multiple-choice", "zero-shot-classification", "table-question-answering"
]
audio_tasks = ["automatic-speech-recognition", "audio-classification", "text-to-speech", "audio-to-audio", "voice-activity-detection"]
cv_tasks = ["image-classification", "image-segmentation", "zero-shot-image-classification", "image-to-image", "unconditional-image-generation", "object-detection"]
multimodal = ["feature-extraction", "text-to-image", "visual-question-answering", "image-to-text", "document-question-answering"]
tabular = ["tabular-classification", "tabular-regression"]
modalities = {
"nlp": nlp_tasks,
"audio": audio_tasks,
"cv": cv_tasks,
"multimodal": multimodal,
"tabular": tabular,
"rl": ["reinforcement-learning"]
}
def modality(row):
pipeline = row["pipeline"]
for modality, tasks in modalities.items():
if pipeline in tasks:
return modality
if type(pipeline) == "str":
return "unk_modality"
return None
st.cache(allow_output_mutation=True)
def process_dataset(version):
# Load dataset at specified revision
dataset = load_dataset("open-source-metrics/model-repos-stats", revision=version)
# Convert to pandas dataframe
data = dataset["train"].to_pandas()
# Add modality column
data["modality"] = data.apply(modality, axis=1)
# Bin the model card length into some bins
data["length_bins"] = pd.cut(data["text_length"], [0, 200, 1000, 2000, 3000, 4000, 5000, 7500, 10000, 20000, 50000])
return data
def eval_tags(row):
tags = row["tags"]
if tags == "none" or tags == [] or tags == "{}":
return []
if tags[0] != "[":
tags = str([tags])
val = literal_eval(tags)
if isinstance(val, dict):
return []
return val
def change_pct(old, new):
return round(100* (new - old) / new, 3)
def change_and_delta(old_old, old, new):
curr_change = change_pct(old, new)
prev_change = change_pct(old_old, old)
delta = round(curr_change-prev_change, 3)
delta = f"{delta}%"
curr_change = f"{curr_change}%"
return curr_change, delta