Junming Yang
commited on
Commit
•
53ff1fe
1
Parent(s):
612773e
Update meta_data
Browse files- meta_data.py +5 -2
meta_data.py
CHANGED
@@ -203,14 +203,17 @@ LEADERBOARD_MD['SEEDBench2_Plus'] = """
|
|
203 |
- SEEDBench2 Plus comprises 2.3K multiple-choice questions with precise human annotations, spanning three broad categories: Charts, Maps, and Webs, each of which covers a wide spectrum of textrich scenarios in the real world.
|
204 |
"""
|
205 |
|
206 |
-
LEADERBOARD_MD['
|
207 |
-
##
|
208 |
|
209 |
- MMT-Bench comprises 31,325 meticulously curated multi-choice visual questions from various multimodal scenarios such as vehicle driving and embodied navigation, covering 32 core meta-tasks and 162 subtasks in multimodal understanding.
|
|
|
|
|
210 |
"""
|
211 |
|
212 |
LEADERBOARD_MD['SEEDBench2'] = """
|
213 |
## SEEDBench2 Evaluation Results
|
214 |
|
215 |
- SEEDBench2 comprises 24K multiple-choice questions with accurate human annotations, which spans 27 dimensions, including the evaluation of both text and image generation.
|
|
|
216 |
"""
|
|
|
203 |
- SEEDBench2 Plus comprises 2.3K multiple-choice questions with precise human annotations, spanning three broad categories: Charts, Maps, and Webs, each of which covers a wide spectrum of textrich scenarios in the real world.
|
204 |
"""
|
205 |
|
206 |
+
LEADERBOARD_MD['MMT-Bench_VAL'] = """
|
207 |
+
## MMT-Bench Validation Evaluation Results
|
208 |
|
209 |
- MMT-Bench comprises 31,325 meticulously curated multi-choice visual questions from various multimodal scenarios such as vehicle driving and embodied navigation, covering 32 core meta-tasks and 162 subtasks in multimodal understanding.
|
210 |
+
- MMT-Bench_VAL is the validation set of MMT-Bench. MMT-Bench_ALL includes both validation and test sets. The suffix `MI`, such as `MMT-Bench_VAL_MI`, represents the multi-image version of the dataset with several images input.
|
211 |
+
The defualt version is the single-image version, which concats the multiple images into a single image as input.
|
212 |
"""
|
213 |
|
214 |
LEADERBOARD_MD['SEEDBench2'] = """
|
215 |
## SEEDBench2 Evaluation Results
|
216 |
|
217 |
- SEEDBench2 comprises 24K multiple-choice questions with accurate human annotations, which spans 27 dimensions, including the evaluation of both text and image generation.
|
218 |
+
- Note that we only evaluate and report the part of model's results on the SEEDBench2.
|
219 |
"""
|