In [None]:
from pathlib import Path
import plotly.graph_objects as go

proj_dir = Path.cwd()
proj_dir

In [None]:
import os
import json
import pandas as pd

# Define the directory path where your files are located
dir_path = proj_dir/'tgi_benchmark_results/'


def build_df():
    # Initialize an empty list to store the dataframes
    dfs = []

    # Iterate through the files in the directory
    for tgibs_folder in dir_path.glob("*/*_tgibs_*"):
        # Check if the file matches the pattern *_summary.json
        summary_file = list(tgibs_folder.glob("*_summary.json"))[0]
        # Extract the tgibs value from the filename
        hw = tgibs_folder.parts[-2]
        tgibs_value = tgibs_folder.name.split('_tgibs_')[1].split('__')[0]

        # Load the JSON file
        with open(summary_file, 'r') as f:
            data = json.load(f)

        # Convert the JSON data to a pandas dataframe
        df = pd.DataFrame([data])

        # Add a column with the tgibs value
        df['tgibs'] = int(tgibs_value)
        df['hw'] = hw
        df['id'] = f"{hw}_{tgibs_value}"

        # Append the dataframe to the list
        dfs.append(df)
    df = pd.concat(dfs, ignore_index=True)
    df = df.sort_values(by=['tgibs', 'num_concurrent_requests'], ascending=[True, True])
    return df

In [None]:
df = build_df()

# Create a figure
fig = go.Figure()

# Group the dataframe by batch_size
grouped_df = df.groupby('id')

# List of specific batch_sizes to label
label_batch_sizes = ['nvidia-a100_8', 'nvidia-h100_8', 'nvidia-a100_8', 'nvidia-h100-fp8_8', 'nvidia-a100_medusa_8']

# Iterate over each group
for batch_size, group in grouped_df:
    # Add a line to the figure
    fig.add_trace(go.Scatter(
        x=group['results_end_to_end_latency_s_mean'],
        y=group['results_num_completed_requests_per_min'],
        mode='lines+markers',
        name=f"Batch Size: {batch_size}",  # Formatting batch size in the legend
        hovertemplate=(
            f"<b>Batch Size: {batch_size}</b><br>"
            "VU: %{text}<br>"
            "Latency: %{x:.2f}s<br>"
            "Throughput: %{y:.2f} reqs/min"
        ) + "<extra></extra>",
        text=[f"{v} VU" for v in group['num_concurrent_requests']]  # This will only be visible on hover
    ))

    # Optionally add annotations only for the first point in the specified batch sizes
    if batch_size in label_batch_sizes:
        fig.add_annotation(
            x=group['results_end_to_end_latency_s_mean'].iloc[0],
            y=group['results_num_completed_requests_per_min'].iloc[0],
            text=f'{batch_size[:-2].replace("nvidia-", "")}',
            showarrow=False,
            ax=0,
            # ay=90,  # Offset to move the text down
            xanchor='center',
            yanchor='top'
        )

# Update layout for the figure
fig.update_layout(
    title_text="Requests Throughput vs Latency by Batch Size",
    xaxis_title="End to End Latency (seconds)",
    yaxis_title="Requests/min",
    showlegend=True,
)

# Show the figure
fig.show()