import re
import gradio as gr
import pandas as pd
import plotly
from pandas.api.types import is_numeric_dtype
from pipeline.config import QueriesConfig, LLMBoardConfig
from pipeline.models import models_costs
README = """
This project analyses different models and providers from the perspective of an application developer.
Models are asked to summarize a text in different languages and using different output formats with following prompt:
{}
"""
summary_df: pd.DataFrame = pd.read_csv("data/2024-01-25 13:30:17.207984_summary.csv")
time_of_day_comparison_df = pd.read_csv("data/2024-01-25 13:30:20.959750_time_of_day_comparison.csv")
general_plots = pd.read_csv("data/2024-01-25 12:22:00.759762_general_plot.csv")
with open("data/time_of_day_plot.json", "r") as f:
time_of_day_plot = plotly.io.from_json(f.read())
model_costs_df = models_costs()
searched_model_name = ""
collapse_languages = False
collapse_output_method = False
def filter_dataframes(input: str):
global searched_model_name
input = input.lower()
searched_model_name = input
return dataframes()
def collapse_languages_toggle():
global collapse_languages
if collapse_languages:
collapse_languages = False
button_text = "Collapse languages"
else:
collapse_languages = True
button_text = "Un-collapse languages"
return dataframes()[0], button_text
def collapse_output_method_toggle():
global collapse_output_method
if collapse_output_method:
collapse_output_method = False
button_text = "Collapse output method"
else:
collapse_output_method = True
button_text = "Un-collapse output method"
return dataframes()[0], button_text
def dataframes():
global collapse_languages, collapse_output_method, searched_model_name, summary_df, time_of_day_comparison_df, model_costs_df
summary_df_columns = summary_df.columns.to_list()
group_columns = LLMBoardConfig().group_columns.copy()
if collapse_languages:
summary_df_columns.remove("language")
group_columns.remove("language")
if collapse_output_method:
summary_df_columns.remove("template_name")
group_columns.remove("template_name")
summary_df_processed = summary_df[summary_df_columns].groupby(by=group_columns).mean().reset_index()
return (
dataframe_style(summary_df_processed[summary_df_processed.model.str.lower().str.contains(searched_model_name)]),
dataframe_style(
time_of_day_comparison_df[time_of_day_comparison_df.model.str.lower().str.contains(searched_model_name)]
),
dataframe_style(model_costs_df[model_costs_df.Model.str.lower().str.contains(searched_model_name)]),
)
def dataframe_style(df: pd.DataFrame):
df = df.copy()
df.columns = [snake_case_to_title(column) for column in df.columns]
column_formats = {}
for column in df.columns:
if is_numeric_dtype(df[column]):
if column == "execution_time":
column_formats[column] = "{:.4f}"
else:
column_formats[column] = "{:.2f}"
df = df.style.format(column_formats, na_rep="")
return df
def snake_case_to_title(text):
# Convert snake_case to title-case
words = re.split(r"_", text)
title_words = [word.capitalize() for word in words]
return " ".join(title_words)
filter_textbox = gr.Textbox(label="Model name part")
filter_button = gr.Button("Filter dataframes by model name")
collapse_languages_button = gr.Button("Collapse languages")
collapse_output_method_button = gr.Button("Collapse output method")
last_textbox = 0
with gr.Blocks() as demo:
gr.HTML("