import datetime
import os
from datetime import timedelta
import argilla as rg
import gradio as gr
import pandas as pd
import plotly.colors as colors
import plotly.graph_objects as go
from cachetools import TTLCache, cached
client = rg.Argilla(
api_url=os.getenv("ARGILLA_API_URL"), api_key=os.getenv("ARGILLA_API_KEY")
)
cache = TTLCache(maxsize=100, ttl=timedelta(minutes=10), timer=datetime.now)
@cached(cache)
def fetch_data(dataset_name: str, workspace: str):
return client.datasets(dataset_name, workspace=workspace)
def get_progress(dataset) -> dict:
records = list(dataset.records)
total_records = len(records)
annotated_records = len(
[record.status for record in records if record.status == "completed"]
)
progress = (annotated_records / total_records) * 100 if total_records > 0 else 0
return {
"total": total_records,
"annotated": annotated_records,
"progress": progress,
}
def get_leaderboard(dataset) -> dict:
user_annotations = {}
for record in dataset.records:
for response in record.responses:
user = response.user_id
retrieved_user = client.users(id=user)
user = retrieved_user.username
if user not in user_annotations:
user_annotations[user] = 0
user_annotations[user] += 1
print(user_annotations)
return user_annotations
def create_gauge_chart(progress):
fig = go.Figure(
go.Indicator(
mode="gauge+number+delta",
value=progress["progress"],
title={"text": "Dataset Annotation Progress", "font": {"size": 24}},
delta={"reference": 100, "increasing": {"color": "RebeccaPurple"}},
number={"font": {"size": 40}, "valueformat": ".1f", "suffix": "%"},
gauge={
"axis": {"range": [None, 100], "tickwidth": 1, "tickcolor": "darkblue"},
"bar": {"color": "deepskyblue"},
"bgcolor": "white",
"borderwidth": 2,
"bordercolor": "gray",
"steps": [
{"range": [0, progress["progress"]], "color": "royalblue"},
{"range": [progress["progress"], 100], "color": "lightgray"},
],
"threshold": {
"line": {"color": "red", "width": 4},
"thickness": 0.75,
"value": 100,
},
},
)
)
fig.update_layout(
annotations=[
dict(
text=(
f"Total records: {progress['total']}
"
f"Annotated: {progress['annotated']} ({progress['progress']:.1f}%)
"
f"Remaining: {progress['total'] - progress['annotated']} ({100 - progress['progress']:.1f}%)"
),
# x=0.5,
# y=-0.2,
showarrow=False,
xref="paper",
yref="paper",
font=dict(size=16),
)
],
)
fig.add_annotation(
text=(
f"Current Progress: {progress['progress']:.1f}% complete
"
f"({progress['annotated']} out of {progress['total']} records annotated)"
),
xref="paper",
yref="paper",
x=0.5,
y=1.1,
showarrow=False,
font=dict(size=18),
align="center",
)
return fig
def create_treemap(user_annotations, total_records):
sorted_users = sorted(user_annotations.items(), key=lambda x: x[1], reverse=True)
color_scale = colors.qualitative.Pastel + colors.qualitative.Set3
labels, parents, values, text, user_colors = [], [], [], [], []
for i, (user, contribution) in enumerate(sorted_users):
percentage = (contribution / total_records) * 100
labels.append(user)
parents.append("Annotations")
values.append(contribution)
text.append(f"{contribution} annotations
{percentage:.2f}%")
user_colors.append(color_scale[i % len(color_scale)])
labels.append("Annotations")
parents.append("")
values.append(total_records)
text.append(f"Total: {total_records} annotations")
user_colors.append("#FFFFFF")
fig = go.Figure(
go.Treemap(
labels=labels,
parents=parents,
values=values,
text=text,
textinfo="label+text",
hoverinfo="label+text+value",
marker=dict(colors=user_colors, line=dict(width=2)),
)
)
fig.update_layout(
title_text="User contributions to the total end dataset",
height=500,
margin=dict(l=10, r=10, t=50, b=10),
paper_bgcolor="#F0F0F0", # Light gray background
plot_bgcolor="#F0F0F0", # Light gray background
)
return fig
def update_dashboard():
dataset = fetch_data(os.getenv("DATASET_NAME"), os.getenv("WORKSPACE"))
progress = get_progress(dataset)
user_annotations = get_leaderboard(dataset)
gauge_chart = create_gauge_chart(progress)
treemap = create_treemap(user_annotations, progress["total"])
leaderboard_df = pd.DataFrame(
list(user_annotations.items()), columns=["User", "Annotations"]
)
leaderboard_df = leaderboard_df.sort_values(
"Annotations", ascending=False
).reset_index(drop=True)
return gauge_chart, treemap, leaderboard_df
with gr.Blocks() as demo:
gr.Markdown("# Argilla Dataset Dashboard")
with gr.Row():
gauge_output = gr.Plot(label="Overall Progress")
treemap_output = gr.Plot(label="User contributions")
with gr.Row():
leaderboard_output = gr.Dataframe(
label="Leaderboard", headers=["User", "Annotations"]
)
demo.load(
update_dashboard,
inputs=None,
outputs=[gauge_output, treemap_output, leaderboard_output],
)
gr.Button("Refresh").click(
update_dashboard,
inputs=None,
outputs=[gauge_output, treemap_output, leaderboard_output],
)
if __name__ == "__main__":
demo.launch()