Spaces:

mlabonne
/

Yet_Another_LLM_Leaderboard

Running

File size: 3,605 Bytes

b338d34

import re
import streamlit as st
import requests
import pandas as pd
from io import StringIO
import plotly.graph_objs as go

from yall import create_yall


def convert_markdown_table_to_dataframe(md_content):
    """
    Converts markdown table to Pandas DataFrame, handling special characters and links,
    extracts Hugging Face URLs, and adds them to a new column.
    """
    # Remove leading and trailing | characters
    cleaned_content = re.sub(r'\|\s*$', '', re.sub(r'^\|\s*', '', md_content, flags=re.MULTILINE), flags=re.MULTILINE)

    # Create DataFrame from cleaned content
    df = pd.read_csv(StringIO(cleaned_content), sep="\|", engine='python')

    # Remove the first row after the header
    df = df.drop(0, axis=0)

    # Strip whitespace from column names
    df.columns = df.columns.str.strip()

    # Extract Hugging Face URLs and add them to a new column
    model_link_pattern = r'\[(.*?)\]\((.*?)\)\s*\[.*?\]\(.*?\)'
    df['URL'] = df['Model'].apply(lambda x: re.search(model_link_pattern, x).group(2) if re.search(model_link_pattern, x) else None)

    # Clean Model column to have only the model link text
    df['Model'] = df['Model'].apply(lambda x: re.sub(model_link_pattern, r'\1', x))

    return df


def create_bar_chart(df, category):
    """Create and display a bar chart for a given category."""
    st.write(f"### {category} Scores")

    # Sort the DataFrame based on the category score
    sorted_df = df[['Model', category]].sort_values(by=category, ascending=True)

    # Create the bar chart with color gradient
    fig = go.Figure(go.Bar(
        x=sorted_df[category],
        y=sorted_df['Model'],
        orientation='h',
        marker=dict(color=sorted_df[category], colorscale='Magma')
    ))

    # Update layout for better readability
    fig.update_layout(
        xaxis_title=category,
        yaxis_title="Model",
        margin=dict(l=20, r=20, t=20, b=20)
    )

    st.plotly_chart(fig, use_container_width=True)

    
def main():
    st.set_page_config(page_title="YALL - Yet Another LLM Leaderboard", layout="wide")

    st.title("🏆 YALL - Yet Another LLM Leaderboard")
    st.markdown("Leaderboard made with [🧐 LLM AutoEval](https://github.com/mlabonne/llm-autoeval) using [Nous](https://huggingface.co/NousResearch) benchmark suite. It's a collection of my own evaluations.")

    content = create_yall()
    if content:
        try:
            score_columns = ['Average', 'AGIEval', 'GPT4All', 'TruthfulQA', 'Bigbench']

            # Display dataframe
            df = convert_markdown_table_to_dataframe(content)
            for col in score_columns:
                df[col] = pd.to_numeric(df[col].str.strip(), errors='coerce')
            st.dataframe(df, use_container_width=True)

            # Full-width plot for the first category
            create_bar_chart(df, score_columns[0])

            # Next two plots in two columns
            col1, col2 = st.columns(2)
            with col1:
                create_bar_chart(df, score_columns[1])
            with col2:
                create_bar_chart(df, score_columns[2])

            # Last two plots in two columns
            col3, col4 = st.columns(2)
            with col3:
                create_bar_chart(df, score_columns[3])
            with col4:
                create_bar_chart(df, score_columns[4])

        except Exception as e:
            st.error("An error occurred while processing the markdown table.")
            st.error(str(e))
    else:
        st.error("Failed to download the content from the URL provided.")

if __name__ == "__main__":
    main()