Spaces:

Inferless
/

LLM-Inference-Benchmark

Running

File size: 6,618 Bytes

import gradio as gr
import pandas as pd
import requests
from io import StringIO

# Description and Introduction texts
DESCRIPTION = """
<h2 style='text-align: center; color: #00480a !important; text-shadow: 2px 2px 4px rgba(0,0,0,0.1);'>🚀 LLM Inference Leaderboard: Pushing the Boundaries of Performance 🚀</h2>
"""

INTRODUCTION = """
<div style='background-color: #e6ffd9; padding: 20px; border-radius: 15px; margin-bottom: 20px; box-shadow: 0 4px 6px rgba(0,0,0,0.1);'>
<h3 style='color: #00480a;'>🔬 Our Exciting Quest</h3>
<p style='color: #00480a;'>We're on a thrilling journey to help developers discover the perfect LLMs and libraries for their innovative projects! We've put these models through their paces using six cutting-edge inference engines:</p>
<ul style='color: #00480a;'>
    <li>🚄 vLLM</li>
    <li>🌟 TGI</li>
    <li>⚡ TensorRT-LLM</li>
    <li>🔮 Tritonvllm</li>
    <li>🚀 Deepspeed-mii</li>
    <li>🎯 ctranslate</li>
</ul>
<p style='color: #00480a;'>All our tests were conducted on state-of-the-art A100 GPUs hosted on Azure, ensuring a fair and neutral battleground!</p>
<p style='color: #00480a; font-weight: bold;'>Our mission: Empower developers, researchers, and AI enthusiasts to find their perfect LLM match for both development and production environments!</p>
</div>
"""

HOW_WE_TESTED = """
<div style='background-color: #cbff4d; padding: 20px; border-radius: 15px; margin-top: 20px; box-shadow: 0 4px 6px rgba(0,0,0,0.1);'>
<h3 style='color: #00480a;'>🧪 Our Rigorous Testing Process</h3>
<p style='color: #00480a;'>We left no stone unturned in our quest for reliable benchmarks:</p>
<ul style='color: #00480a;'>
    <li><strong>🖥️ Platform:</strong> A100 GPUs from Azure - the ultimate testing ground!</li>
    <li><strong>🐳 Setup:</strong> Docker containers for each library, ensuring a pristine environment.</li>
    <li><strong>⚙️ Configuration:</strong> Standardized settings (temperature 0.5, top_p 1) for laser-focused performance comparisons.</li>
    <li><strong>📊 Prompts & Token Ranges:</strong> Six diverse prompts, input lengths from 20 to 2,000 tokens, and generation lengths of 100, 200, and 500 tokens - pushing the boundaries of flexibility!</li>
    <li><strong>🤖 Models & Libraries Tested:</strong> We put the best through their paces: Phi-3-medium-128k-instruct, Meta-Llama-3.1-8B-Instruct, Mistral-7B-Instruct-v0.3, Qwen2-7B-Instruct, and Gemma-2-9b-it, using TGI, vLLM, DeepSpeed Mii, CTranslate2, Triton with vLLM Backend, and TensorRT-LLM.</li>
</ul>
</div>
<div style='background-color: #e6ffd9; padding: 20px; border-radius: 15px; margin-top: 20px; box-shadow: 0 4px 6px rgba(0,0,0,0.1);'>
<h3 style='color: #00480a;'>🔗 Additional Resources</h3>
<p style='color: #00480a;'>For a deeper dive into LLM speed benchmarks and independent analysis, check out these complete blogs:</p>
<ul style='color: #00480a;'>
    <li><a href="https://www.inferless.com/learn/exploring-llms-speed-benchmarks-independent-analysis---part-3" target="_blank" style="color: #006400;">Exploring LLMs Speed Benchmarks: Independent Analysis - Part 3</a></li>
    <li><a href="https://www.inferless.com/learn/exploring-llms-speed-benchmarks-independent-analysis---part-2" target="_blank" style="color: #006400;">Exploring LLMs Speed Benchmarks: Independent Analysis - Part 2</a></li>
    <li><a href="https://www.inferless.com/learn/exploring-llms-speed-benchmarks-independent-analysis" target="_blank" style="color: #006400;">Exploring LLMs Speed Benchmarks: Independent Analysis</a></li>
</ul>
</div>
"""

# URL of the CSV file
CSV_URL = "hf://datasets/rbgo/llm-inference-benchmark/LLM-inference-benchmark-3.csv"

def load_and_process_csv():
    # response = requests.get(CSV_URL)
    # csv_content = StringIO(response.text)
    df = pd.read_csv(CSV_URL)
    
    columns_order = [
        "Model_Name", "Library", "TTFT", "Tokens-per-Second","Latency","Token_Count", "input_length","output_length"
    ]
    
    for col in columns_order:
        if col not in df.columns:
            df[col] = pd.NA
    
    return df[columns_order]

df = load_and_process_csv()

def get_leaderboard_df():
    return df

def filter_and_search(model_filter, library_filter):
    filtered_df = df.copy()
    
    if model_filter != "All":
        filtered_df = filtered_df[filtered_df['Model_Name'] == model_filter]
    
    if library_filter != "All":
        filtered_df = filtered_df[filtered_df['Library'] == library_filter]
    
    return filtered_df

custom_css = """
body {
    background-color: #f0fff0;
    font-family: 'Roboto', sans-serif;
}
.gradio-container {
    max-width: 1200px !important;
}
.gradio-container .prose * {
    color: #00480a !important;
}
.gradio-container .prose h2,
.gradio-container .prose h3 {
    color: #00480a !important;
}
.tabs {
    background-color: #e6ffd9;
    border-radius: 15px;
    overflow: hidden;
    box-shadow: 0 4px 6px rgba(0,0,0,0.1);
}
.tab-nav {
    background-color: #00480a;
    padding: 10px;
}
.tab-nav button {
    color: #cbff4d !important;
    background-color: #006400;
    border: none;
    padding: 10px 20px;
    margin-right: 5px;
    border-radius: 10px;
    cursor: pointer;
    transition: all 0.3s ease;
}
.tab-nav button:hover {
    background-color: #cbff4d;
    color: #00480a !important;
}
.tab-nav button.selected {
    background-color: #cbff4d;
    color: #00480a !important;
    font-weight: bold;
}
.gr-button-primary {
    background-color: #00480a !important;
    border-color: #00480a !important;
    color: #cbff4d !important;
}
.gr-button-primary:hover {
    background-color: #cbff4d !important;
    color: #00480a !important;
}
"""

with gr.Blocks(css=custom_css) as demo:
    gr.HTML(DESCRIPTION)
    gr.HTML(INTRODUCTION)
    
    with gr.Tabs():
        with gr.TabItem("📊 Leaderboard"):
            with gr.Row():
                model_dropdown = gr.Dropdown(choices=["All"] + df['Model_Name'].unique().tolist(), label="🏷️ Filter by Model_Name", value="All")                
                library_dropdown = gr.Dropdown(choices=["All"] + df['Library'].unique().tolist(), label="🏷️ Filter by Library", value="All")
            
            leaderboard = gr.DataFrame(df)
            
            gr.HTML(HOW_WE_TESTED)

    model_dropdown.change(filter_and_search, inputs=[model_dropdown, library_dropdown], outputs=leaderboard)
    library_dropdown.change(filter_and_search, inputs=[model_dropdown, library_dropdown], outputs=leaderboard)

    demo.load(get_leaderboard_df, outputs=[leaderboard])

if __name__ == "__main__":
    demo.launch()