kennymckormick
commited on
Commit
•
3307746
1
Parent(s):
5c63389
update app.py
Browse files- app.py +1 -1
- meta_data.py +3 -2
app.py
CHANGED
@@ -22,7 +22,7 @@ with gr.Blocks() as demo:
|
|
22 |
with gr.Tabs(elem_classes='tab-buttons') as tabs:
|
23 |
with gr.TabItem('🏅 OpenVLM Main Leaderboard', elem_id='main', id=0):
|
24 |
gr.Markdown(LEADERBOARD_MD['MAIN'])
|
25 |
-
table, check_box = BUILD_L1_DF(results,
|
26 |
type_map = check_box['type_map']
|
27 |
checkbox_group = gr.CheckboxGroup(
|
28 |
choices=check_box['all'],
|
|
|
22 |
with gr.Tabs(elem_classes='tab-buttons') as tabs:
|
23 |
with gr.TabItem('🏅 OpenVLM Main Leaderboard', elem_id='main', id=0):
|
24 |
gr.Markdown(LEADERBOARD_MD['MAIN'])
|
25 |
+
table, check_box = BUILD_L1_DF(results, DEFAULT_BENCH)
|
26 |
type_map = check_box['type_map']
|
27 |
checkbox_group = gr.CheckboxGroup(
|
28 |
choices=check_box['all'],
|
meta_data.py
CHANGED
@@ -24,7 +24,8 @@ META_FIELDS = ['Method', 'Parameters (B)', 'Language Model', 'Vision Model', 'Op
|
|
24 |
MAIN_FIELDS = [
|
25 |
'MMBench_V11', 'MMStar', 'MME',
|
26 |
'MMMU_VAL', 'MathVista', 'OCRBench', 'AI2D',
|
27 |
-
'HallusionBench', 'SEEDBench_IMG', 'MMVet',
|
|
|
28 |
]
|
29 |
DEFAULT_BENCH = [
|
30 |
'MMBench_V11', 'MMStar', 'MMMU_VAL', 'MathVista', 'OCRBench', 'AI2D',
|
@@ -43,7 +44,7 @@ LEADERBOARD_MD['MAIN'] = f"""
|
|
43 |
- Metrics:
|
44 |
- Avg Score: The average score on all VLM Benchmarks (normalized to 0 - 100, the higher the better).
|
45 |
- Avg Rank: The average rank on all VLM Benchmarks (the lower the better).
|
46 |
-
- Avg Score & Rank are calculated based on selected benchmark.
|
47 |
- By default, we present the overall evaluation results based on {len(DEFAULT_BENCH)} VLM benchmarks, sorted by the descending order of Avg Score.
|
48 |
- The following datasets are included in the main results: {', '.join(DEFAULT_BENCH)}.
|
49 |
- Detailed evaluation results for each dataset (included or not included in main) are provided in the consequent tabs.
|
|
|
24 |
MAIN_FIELDS = [
|
25 |
'MMBench_V11', 'MMStar', 'MME',
|
26 |
'MMMU_VAL', 'MathVista', 'OCRBench', 'AI2D',
|
27 |
+
'HallusionBench', 'SEEDBench_IMG', 'MMVet',
|
28 |
+
'LLaVABench', 'CCBench', 'RealWorldQA', 'POPE', 'ScienceQA_TEST'
|
29 |
]
|
30 |
DEFAULT_BENCH = [
|
31 |
'MMBench_V11', 'MMStar', 'MMMU_VAL', 'MathVista', 'OCRBench', 'AI2D',
|
|
|
44 |
- Metrics:
|
45 |
- Avg Score: The average score on all VLM Benchmarks (normalized to 0 - 100, the higher the better).
|
46 |
- Avg Rank: The average rank on all VLM Benchmarks (the lower the better).
|
47 |
+
- Avg Score & Rank are calculated based on selected benchmark. **When results for some selected benchmarks are missing, Avg Score / Rank will be None!!!**
|
48 |
- By default, we present the overall evaluation results based on {len(DEFAULT_BENCH)} VLM benchmarks, sorted by the descending order of Avg Score.
|
49 |
- The following datasets are included in the main results: {', '.join(DEFAULT_BENCH)}.
|
50 |
- Detailed evaluation results for each dataset (included or not included in main) are provided in the consequent tabs.
|