""")
with gr.Tab("Existing Results"):
with gr.Row():
show_duration = gr.Checkbox(label="Show Test Set Accuracy by Duration Groups", value=False)
show_category = gr.Checkbox(label="Show Test Set Accuracy by Question Categories", value=False)
key_input = gr.Textbox(label="Rank LMMs by column:", placeholder="Test Total (default)")
data_frame = gr.DataFrame(sort_data('Test Total', show_duration=False, show_category=False))
def update_data_frame(key, show_duration, show_category):
return sort_data(key, show_duration, show_category)
key_input.change(update_data_frame, inputs=[key_input, show_duration, show_category], outputs=data_frame)
show_duration.change(update_data_frame, inputs=[key_input, show_duration, show_category], outputs=data_frame)
show_category.change(update_data_frame, inputs=[key_input, show_duration, show_category], outputs=data_frame)
gr.Markdown("Models are evaluated using their optimal #max frames, capped at 256 frames.")
with gr.Tab("Submit!"):
gr.Markdown(
'''The answer of validation set of LongVideoBench is public now. Please see our [released dataset](https://huggingface.co/datasets/longvideobench/LongVideoBench) for more information.
For test set, please prepare your output as follows:
```python
{VIDEO_ID_0: "A", VIDEO_ID_1: "D", ...} # Please make sure your submission only contains the letter of model's choice, or starts with the letter of model's choice.
```
and submit to us as a JSON file.
Please prepare an email to `haoning001@e.ntu.edu.sg` titled [LongVideoBench-Submission-YOURNAME] to submit and obtain your results.
_We will launch an automatic submission server soon._'''
)
block.launch()