import gradio as gr from src.assets import custom_css # from src.attention import create_attn_plots from src.content import ABOUT, CITATION_BUTTON, CITATION_BUTTON_LABEL, LOGO, TITLE from src.leaderboard import create_leaderboard_table from src.llm_perf import get_llm_perf_df from src.map import create_lat_score_mem_plot from src.panel import ( create_control_callback, create_control_panel, create_select_callback, ) # from custom_kernels import create_quant_krnl_plots MACHINE_TO_HARDWARE = { "1xA10": "A10-24GB-150W 🖥️", "1xA100": "A100-80GB-275W 🖥️", "1xT4": "T4-16GB-70W 🖥️", "intel": "4th-Gen-Intel-Xeon-385W 🖥️", # "1xH100": "H100-80GB-700W 🖥️", } demo = gr.Blocks(css=custom_css) with demo: gr.HTML(LOGO, elem_classes="logo") gr.HTML(TITLE, elem_classes="title") ####################### HARDWARE TABS ####################### with gr.Tabs(elem_classes="tabs"): for id, (machine, hardware) in enumerate(MACHINE_TO_HARDWARE.items()): with gr.TabItem(hardware, id=id): ####################### CONTROL PANEL ####################### ( filter_button, machine_textbox, score_slider, memory_slider, backend_checkboxes, datatype_checkboxes, optimization_checkboxes, quantization_checkboxes, kernels_checkboxes, ) = create_control_panel(machine=machine) ####################### HARDWARE SUBTABS ####################### with gr.Tabs(elem_classes="subtabs"): open_llm_perf_df = get_llm_perf_df(machine=machine) ####################### LEADERBOARD TAB ####################### with gr.TabItem("Leaderboard 🏅", id=0): search_bar, columns_checkboxes, leaderboard_table = ( create_leaderboard_table(open_llm_perf_df) ) with gr.TabItem("Find Your Best Model 🧭", id=1): lat_score_mem_plot = create_lat_score_mem_plot(open_llm_perf_df) ###################### ATTENTIONS SPEEDUP TAB ####################### # with gr.TabItem("Attention 📈", id=2): # attn_prefill_plot, attn_decode_plot = create_attn_plots( # open_llm_perf_df # ) # ####################### KERNELS SPEEDUP TAB ####################### # with gr.TabItem("Kernels 📈", id=4): # quant_krnl_prefill_plot, quant_krnl_decode_plot = ( # create_quant_krnl_plots(llm_perf_df) # ) ####################### CONTROL CALLBACK ####################### create_control_callback( filter_button, # inputs machine_textbox, score_slider, memory_slider, backend_checkboxes, datatype_checkboxes, optimization_checkboxes, quantization_checkboxes, kernels_checkboxes, # interactive columns_checkboxes, search_bar, # outputs leaderboard_table, lat_score_mem_plot, # attn_prefill_plot, # attn_decode_plot, # quant_krnl_prefill_plot, # quant_krnl_decode_plot, ) create_select_callback( # inputs machine_textbox, # interactive columns_checkboxes, search_bar, # outputs leaderboard_table, ) ####################### ABOUT TAB ####################### with gr.TabItem("About 📖", id=3): gr.Markdown(ABOUT, elem_classes="descriptive-text") ####################### CITATION with gr.Row(): with gr.Accordion("📙 Citation", open=False): citation_button = gr.Textbox( value=CITATION_BUTTON, label=CITATION_BUTTON_LABEL, elem_id="citation-button", show_copy_button=True, ) if __name__ == "__main__": # Launch demo demo.queue().launch()