Upload 15 files
Browse files- __pycache__/constants.cpython-38.pyc +0 -0
- app.py +62 -37
- constants.py +10 -5
- file/result.csv +35 -35
- file/result_task.csv +46 -0
- file/result_v2.csv +28 -28
- file/result_v2_task.csv +28 -0
- src/__pycache__/utils_display.cpython-38.pyc +0 -0
- src/auto_leaderboard/__pycache__/model_metadata_type.cpython-38.pyc +0 -0
__pycache__/constants.cpython-38.pyc
CHANGED
Binary files a/__pycache__/constants.cpython-38.pyc and b/__pycache__/constants.cpython-38.pyc differ
|
|
app.py
CHANGED
@@ -289,17 +289,22 @@ def add_new_eval(
|
|
289 |
csv_data = csv_data.to_csv(CSV_V2_DIR, index=False)
|
290 |
return 0
|
291 |
|
292 |
-
def get_baseline_df():
|
293 |
-
|
294 |
-
|
|
|
|
|
295 |
df = df.sort_values(by="Avg. All", ascending=False)
|
296 |
present_columns = MODEL_INFO + checkbox_group.value
|
297 |
df = df[present_columns]
|
298 |
return df
|
299 |
|
300 |
-
def get_baseline_v2_df():
|
301 |
# pdb.set_trace()
|
302 |
-
|
|
|
|
|
|
|
303 |
df = df.sort_values(by="Avg. P1", ascending=False)
|
304 |
present_columns = MODEL_INFO_V2 + checkbox_group_v2.value
|
305 |
# pdb.set_trace()
|
@@ -316,6 +321,10 @@ def get_all_v2_df():
|
|
316 |
df = df.sort_values(by="Avg. P1", ascending=False)
|
317 |
return df
|
318 |
|
|
|
|
|
|
|
|
|
319 |
block = gr.Blocks()
|
320 |
|
321 |
|
@@ -345,25 +354,28 @@ with block:
|
|
345 |
interactive=True,
|
346 |
)
|
347 |
|
348 |
-
|
349 |
-
|
350 |
-
|
351 |
-
|
352 |
-
|
353 |
-
|
354 |
-
|
|
|
355 |
|
356 |
-
|
357 |
-
|
358 |
-
|
359 |
-
|
360 |
-
|
361 |
-
|
362 |
-
|
|
|
|
|
363 |
|
364 |
# 创建数据帧组件
|
365 |
data_component_v2 = gr.components.Dataframe(
|
366 |
-
value=get_baseline_v2_df,
|
367 |
headers=COLUMN_V2_NAMES,
|
368 |
type="pandas",
|
369 |
datatype=DATA_TITILE_V2_TYPE,
|
@@ -415,7 +427,11 @@ with block:
|
|
415 |
# pdb.set_trace()
|
416 |
|
417 |
return filter_component.value
|
|
|
|
|
|
|
418 |
|
|
|
419 |
model_size_v2.change(fn=on_filter_model_size_method_v2_change, inputs=[model_size_v2, evaluation_method_v2, checkbox_group_v2], outputs=data_component_v2)
|
420 |
evaluation_method_v2.change(fn=on_filter_model_size_method_v2_change, inputs=[model_size_v2, evaluation_method_v2, checkbox_group_v2], outputs=data_component_v2)
|
421 |
checkbox_group_v2.change(fn=on_filter_model_size_method_v2_change, inputs=[model_size_v2, evaluation_method_v2, checkbox_group_v2], outputs=data_component_v2)
|
@@ -442,31 +458,36 @@ with block:
|
|
442 |
interactive=True,
|
443 |
)
|
444 |
|
445 |
-
|
446 |
-
|
447 |
-
|
448 |
-
|
449 |
-
|
450 |
-
|
451 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
452 |
|
453 |
-
# selection for model size part:
|
454 |
-
evaluation_method = gr.CheckboxGroup(
|
455 |
-
choices=EVALUATION_METHOD,
|
456 |
-
value=EVALUATION_METHOD,
|
457 |
-
label="Evaluation Method",
|
458 |
-
interactive=True,
|
459 |
-
)
|
460 |
|
461 |
# 创建数据帧组件
|
462 |
data_component = gr.components.Dataframe(
|
463 |
-
value=get_baseline_df,
|
464 |
headers=COLUMN_NAMES,
|
465 |
type="pandas",
|
466 |
datatype=DATA_TITILE_TYPE,
|
467 |
interactive=False,
|
468 |
visible=True,
|
469 |
)
|
|
|
470 |
|
471 |
def on_filter_model_size_method_change(selected_model_size, selected_evaluation_method, selected_columns):
|
472 |
|
@@ -512,7 +533,11 @@ with block:
|
|
512 |
# pdb.set_trace()
|
513 |
|
514 |
return filter_component.value
|
|
|
|
|
|
|
515 |
|
|
|
516 |
model_size.change(fn=on_filter_model_size_method_change, inputs=[model_size, evaluation_method, checkbox_group], outputs=data_component)
|
517 |
evaluation_method.change(fn=on_filter_model_size_method_change, inputs=[model_size, evaluation_method, checkbox_group], outputs=data_component)
|
518 |
checkbox_group.change(fn=on_filter_model_size_method_change, inputs=[model_size, evaluation_method, checkbox_group], outputs=data_component)
|
@@ -625,8 +650,8 @@ with block:
|
|
625 |
|
626 |
|
627 |
def refresh_data():
|
628 |
-
value1 = get_baseline_df()
|
629 |
-
value2 = get_baseline_v2_df()
|
630 |
|
631 |
return value1, value2
|
632 |
|
|
|
289 |
csv_data = csv_data.to_csv(CSV_V2_DIR, index=False)
|
290 |
return 0
|
291 |
|
292 |
+
def get_baseline_df(average_type):
|
293 |
+
if average_type == 'All Average':
|
294 |
+
df = pd.read_csv(CSV_DIR)
|
295 |
+
else:
|
296 |
+
df = pd.read_csv(CSV_TASK_DIR)
|
297 |
df = df.sort_values(by="Avg. All", ascending=False)
|
298 |
present_columns = MODEL_INFO + checkbox_group.value
|
299 |
df = df[present_columns]
|
300 |
return df
|
301 |
|
302 |
+
def get_baseline_v2_df(average_type):
|
303 |
# pdb.set_trace()
|
304 |
+
if average_type == 'All Average':
|
305 |
+
df = pd.read_csv(CSV_V2_DIR)
|
306 |
+
else:
|
307 |
+
df = pd.read_csv(CSV_V2_TASK_DIR)
|
308 |
df = df.sort_values(by="Avg. P1", ascending=False)
|
309 |
present_columns = MODEL_INFO_V2 + checkbox_group_v2.value
|
310 |
# pdb.set_trace()
|
|
|
321 |
df = df.sort_values(by="Avg. P1", ascending=False)
|
322 |
return df
|
323 |
|
324 |
+
|
325 |
+
def switch_version(version):
|
326 |
+
return f"当前版本: {version}"
|
327 |
+
|
328 |
block = gr.Blocks()
|
329 |
|
330 |
|
|
|
354 |
interactive=True,
|
355 |
)
|
356 |
|
357 |
+
with gr.Row():
|
358 |
+
# selection for model size part:
|
359 |
+
model_size_v2 = gr.CheckboxGroup(
|
360 |
+
choices=MODEL_SIZE,
|
361 |
+
value=MODEL_SIZE,
|
362 |
+
label="Model Size",
|
363 |
+
interactive=True,
|
364 |
+
)
|
365 |
|
366 |
+
# selection for model size part:
|
367 |
+
evaluation_method_v2 = gr.CheckboxGroup(
|
368 |
+
choices=EVALUATION_METHOD,
|
369 |
+
value=EVALUATION_METHOD,
|
370 |
+
label="Evaluation Method",
|
371 |
+
interactive=True,
|
372 |
+
)
|
373 |
+
|
374 |
+
average_type_v2 = gr.Radio(AVERAGE_TYPE, label="Performance Average Type", value="All Average")
|
375 |
|
376 |
# 创建数据帧组件
|
377 |
data_component_v2 = gr.components.Dataframe(
|
378 |
+
value=get_baseline_v2_df(average_type_v2.value),
|
379 |
headers=COLUMN_V2_NAMES,
|
380 |
type="pandas",
|
381 |
datatype=DATA_TITILE_V2_TYPE,
|
|
|
427 |
# pdb.set_trace()
|
428 |
|
429 |
return filter_component.value
|
430 |
+
|
431 |
+
def on_average_type_v2_change(average_type_v2):
|
432 |
+
return get_baseline_v2_df(average_type_v2)
|
433 |
|
434 |
+
average_type_v2.change(fn=on_average_type_v2_change, inputs=[average_type_v2], outputs=data_component_v2)
|
435 |
model_size_v2.change(fn=on_filter_model_size_method_v2_change, inputs=[model_size_v2, evaluation_method_v2, checkbox_group_v2], outputs=data_component_v2)
|
436 |
evaluation_method_v2.change(fn=on_filter_model_size_method_v2_change, inputs=[model_size_v2, evaluation_method_v2, checkbox_group_v2], outputs=data_component_v2)
|
437 |
checkbox_group_v2.change(fn=on_filter_model_size_method_v2_change, inputs=[model_size_v2, evaluation_method_v2, checkbox_group_v2], outputs=data_component_v2)
|
|
|
458 |
interactive=True,
|
459 |
)
|
460 |
|
461 |
+
with gr.Row():
|
462 |
+
# selection for model size part:
|
463 |
+
model_size = gr.CheckboxGroup(
|
464 |
+
choices=MODEL_SIZE,
|
465 |
+
value=MODEL_SIZE,
|
466 |
+
label="Model Size",
|
467 |
+
interactive=True,
|
468 |
+
)
|
469 |
+
|
470 |
+
# selection for model size part:
|
471 |
+
evaluation_method = gr.CheckboxGroup(
|
472 |
+
choices=EVALUATION_METHOD,
|
473 |
+
value=EVALUATION_METHOD,
|
474 |
+
label="Evaluation Method",
|
475 |
+
interactive=True,
|
476 |
+
)
|
477 |
+
|
478 |
+
average_type = gr.Radio(AVERAGE_TYPE, label="Performance Average Type", value="All Average")
|
479 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
480 |
|
481 |
# 创建数据帧组件
|
482 |
data_component = gr.components.Dataframe(
|
483 |
+
value=get_baseline_df(average_type.value),
|
484 |
headers=COLUMN_NAMES,
|
485 |
type="pandas",
|
486 |
datatype=DATA_TITILE_TYPE,
|
487 |
interactive=False,
|
488 |
visible=True,
|
489 |
)
|
490 |
+
|
491 |
|
492 |
def on_filter_model_size_method_change(selected_model_size, selected_evaluation_method, selected_columns):
|
493 |
|
|
|
533 |
# pdb.set_trace()
|
534 |
|
535 |
return filter_component.value
|
536 |
+
|
537 |
+
def on_average_type_change(average_type):
|
538 |
+
return get_baseline_df(average_type)
|
539 |
|
540 |
+
average_type.change(fn=on_average_type_change, inputs=[average_type], outputs=data_component)
|
541 |
model_size.change(fn=on_filter_model_size_method_change, inputs=[model_size, evaluation_method, checkbox_group], outputs=data_component)
|
542 |
evaluation_method.change(fn=on_filter_model_size_method_change, inputs=[model_size, evaluation_method, checkbox_group], outputs=data_component)
|
543 |
checkbox_group.change(fn=on_filter_model_size_method_change, inputs=[model_size, evaluation_method, checkbox_group], outputs=data_component)
|
|
|
650 |
|
651 |
|
652 |
def refresh_data():
|
653 |
+
value1 = get_baseline_df(average_type)
|
654 |
+
value2 = get_baseline_v2_df(average_type_v2)
|
655 |
|
656 |
return value1, value2
|
657 |
|
constants.py
CHANGED
@@ -6,15 +6,17 @@ EVALUATION_METHOD = ["PPL", "PPL for A/B/C/D", "Generate", "NG"]
|
|
6 |
DIMENSION_LEVEL = ["L1", "L2", "L3"]
|
7 |
LEADERBOARD_VERSION = ["Version1", "Version2"]
|
8 |
TASK_INFO = ["Avg. All", "Avg. Img", "Avg. Video", "Scene Understanding", "Instance Identity", "Instance Attribute", "Instance Location", "Instance Counting", "Spatial Relation", "Instance Interaction", "Visual Reasoning", "Text Recognition", "Action Recognition", "Action Prediction", "Procedure Understanding"]
|
9 |
-
TASK_V2_INFO = ["Avg. P1", "Avg. P2", "Avg. P3", "Scene Understanding", "Instance Identity", "Instance Attribute", "Instance Location", "Instance Counting", "Spatial Relation", "Instance Interaction", "Visual Reasoning", "Text Recognition", "Celebrity Recognition", "Landmark Recognition", "Chart Understanding", "Visual Referring Expression", "Science Knowledge", "Emotion Recognition", "Visual Mathematics", "Difference Spotting", "Meme Comprehension", "Global Video Understanding", "Action Recognition", "Action Predicion", "Procedure Understanding", "In-Context Captioning", "Interleaved Image-Text Analysis", "Text-to-Image Generation", "Next Image Prediction", "Text-Image Creation"]
|
10 |
|
11 |
AVG_INFO = ["Avg. All", "Avg. Img", "Avg. Video"]
|
12 |
-
AVG_V2_INFO = ["Avg. P1", "Avg. P2", "Avg. P3"]
|
13 |
|
14 |
DATA_TITILE_TYPE = ["markdown", "markdown", "markdown", "markdown", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number"]
|
15 |
-
DATA_TITILE_V2_TYPE = ["markdown", "markdown","markdown", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number"]
|
16 |
CSV_DIR = "./file/result.csv"
|
|
|
17 |
CSV_V2_DIR = "./file/result_v2.csv"
|
|
|
18 |
|
19 |
COLUMN_NAMES = MODEL_INFO + TASK_INFO
|
20 |
COLUMN_V2_NAMES = MODEL_INFO_V2 + TASK_V2_INFO
|
@@ -22,6 +24,9 @@ COLUMN_V2_NAMES = MODEL_INFO_V2 + TASK_V2_INFO
|
|
22 |
DATA_NUM = [3158, 1831, 4649, 978, 2447, 657, 97, 331, 85, 1509, 1225, 1023]
|
23 |
DATA_NUM_V2 = [3158, 1831, 4649, 978, 2447, 657, 97, 331, 435, 330, 500, 501, 199, 277, 501, 132, 501, 159, 1594, 1509, 1225, 1023, 120, 49, 1008, 81, 79]
|
24 |
|
|
|
|
|
|
|
25 |
LEADERBORAD_INTRODUCTION = """# SEED-Bench Leaderboard
|
26 |
|
27 |
Welcome to the leaderboard of the SEED-Bench! 🏆
|
@@ -81,9 +86,9 @@ SUBMIT_INTRODUCTION = """# Submit on SEED Benchmark Introduction
|
|
81 |
TABLE_INTRODUCTION = """In the table below, we summarize each task performance of all the models.
|
82 |
We use accurancy(%) as the primary evaluation metric for each tasks.
|
83 |
|
84 |
-
|
85 |
|
86 |
-
|
87 |
|
88 |
For PPL evaluation method, we count the loss for each candidate and select the lowest loss candidate. For detail, please refer [InternLM_Xcomposer_VL_interface](https://github.com/AILab-CVC/SEED-Bench/blob/387a067b6ba99ae5e8231f39ae2d2e453765765c/SEED-Bench-2/model/InternLM_Xcomposer_VL_interface.py#L74).
|
89 |
|
|
|
6 |
DIMENSION_LEVEL = ["L1", "L2", "L3"]
|
7 |
LEADERBOARD_VERSION = ["Version1", "Version2"]
|
8 |
TASK_INFO = ["Avg. All", "Avg. Img", "Avg. Video", "Scene Understanding", "Instance Identity", "Instance Attribute", "Instance Location", "Instance Counting", "Spatial Relation", "Instance Interaction", "Visual Reasoning", "Text Recognition", "Action Recognition", "Action Prediction", "Procedure Understanding"]
|
9 |
+
TASK_V2_INFO = ["Avg. Single", "Avg. Multi", "Avg. Video", "Avg. P1", "Avg. P2", "Avg. P3", "Scene Understanding", "Instance Identity", "Instance Attribute", "Instance Location", "Instance Counting", "Spatial Relation", "Instance Interaction", "Visual Reasoning", "Text Recognition", "Celebrity Recognition", "Landmark Recognition", "Chart Understanding", "Visual Referring Expression", "Science Knowledge", "Emotion Recognition", "Visual Mathematics", "Difference Spotting", "Meme Comprehension", "Global Video Understanding", "Action Recognition", "Action Predicion", "Procedure Understanding", "In-Context Captioning", "Interleaved Image-Text Analysis", "Text-to-Image Generation", "Next Image Prediction", "Text-Image Creation"]
|
10 |
|
11 |
AVG_INFO = ["Avg. All", "Avg. Img", "Avg. Video"]
|
12 |
+
AVG_V2_INFO = ["Avg. Single", "Avg. Multi", "Avg. Video", "Avg. P1", "Avg. P2", "Avg. P3"]
|
13 |
|
14 |
DATA_TITILE_TYPE = ["markdown", "markdown", "markdown", "markdown", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number"]
|
15 |
+
DATA_TITILE_V2_TYPE = ["markdown", "markdown","markdown", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number"]
|
16 |
CSV_DIR = "./file/result.csv"
|
17 |
+
CSV_TASK_DIR = './file/result_task.csv'
|
18 |
CSV_V2_DIR = "./file/result_v2.csv"
|
19 |
+
CSV_V2_TASK_DIR = './file/result_v2_task.csv'
|
20 |
|
21 |
COLUMN_NAMES = MODEL_INFO + TASK_INFO
|
22 |
COLUMN_V2_NAMES = MODEL_INFO_V2 + TASK_V2_INFO
|
|
|
24 |
DATA_NUM = [3158, 1831, 4649, 978, 2447, 657, 97, 331, 85, 1509, 1225, 1023]
|
25 |
DATA_NUM_V2 = [3158, 1831, 4649, 978, 2447, 657, 97, 331, 435, 330, 500, 501, 199, 277, 501, 132, 501, 159, 1594, 1509, 1225, 1023, 120, 49, 1008, 81, 79]
|
26 |
|
27 |
+
LEADERBORAD_VERSION = ["SEED-Bench-1", "SEED-Bench-2"]
|
28 |
+
AVERAGE_TYPE = ["All Average", "Task Average"]
|
29 |
+
|
30 |
LEADERBORAD_INTRODUCTION = """# SEED-Bench Leaderboard
|
31 |
|
32 |
Welcome to the leaderboard of the SEED-Bench! 🏆
|
|
|
86 |
TABLE_INTRODUCTION = """In the table below, we summarize each task performance of all the models.
|
87 |
We use accurancy(%) as the primary evaluation metric for each tasks.
|
88 |
|
89 |
+
Performance Average Type is All Average means that calculates the overall accuracy by dividing the total number of correct QA answers by the total number of QA questions.
|
90 |
|
91 |
+
Performance Average Type is Task Average represents that the overall accuracy using the average accuracy of each dimension.
|
92 |
|
93 |
For PPL evaluation method, we count the loss for each candidate and select the lowest loss candidate. For detail, please refer [InternLM_Xcomposer_VL_interface](https://github.com/AILab-CVC/SEED-Bench/blob/387a067b6ba99ae5e8231f39ae2d2e453765765c/SEED-Bench-2/model/InternLM_Xcomposer_VL_interface.py#L74).
|
94 |
|
file/result.csv
CHANGED
@@ -1,46 +1,46 @@
|
|
1 |
Model Type,Model,Language Model,Model Size,Evaluation Method,Avg. All,Avg. Img,Avg. Video,Scene Understanding,Instance Identity,Instance Attribute,Instance Location,Instance Counting,Spatial Relation,Instance Interaction,Visual Reasoning,Text Recognition,Action Recognition,Action Prediction,Procedure Understanding
|
2 |
-
LLM,[Flan-T5](https://huggingface.co/google/flan-t5-xl),Flan-T5-XL,3B,PPL,27.7,27.3,28.6,23
|
3 |
LLM,[Vicuna](https://huggingface.co/lmsys/vicuna-7b-v1.3),Vicuna-7B,7B,PPL,28.5,28.2,29.5,23.4,30.7,29.7,30.9,30.8,28.6,29.8,18.5,13.4,27.3,34.5,23.8
|
4 |
-
LLM,[LLaMA](https://research.facebook.com/publications/llama-open-and-efficient-foundation-language-models/),LLaMA-7B,7B,PPL,26.8,26.6,27.3,26.3,27.4,26.2,28.3,25.1,28.8,19.2,37
|
5 |
-
ImageLLM,[BLIP-2](https://github.com/salesforce/LAVIS),Flan-T5-XL,3B,PPL,46.4,49.7,36.7,59.1,53.9,49.2,42.3,43.2,36.7,55.7,45.6,25.9,32.6,47.5,24
|
6 |
ImageLLM,[InstructBLIP](https://github.com/salesforce/LAVIS),Flan-T5-XL,3B,PPL,52.7,57.8,38.3,60.3,58.5,63.4,40.6,58.4,38.7,51.6,45.9,25.9,33.1,49.1,27.1
|
7 |
ImageLLM,[InstructBLIP-Vicuna](https://github.com/salesforce/LAVIS),Vicuna-7B,7B,PPL,53.4,58.8,38.1,60.2,58.9,65.6,43.6,57.2,40.3,52.6,47.7,43.5,34.5,49.6,23.1
|
8 |
-
ImageLLM,[LLaVA-1.5](https://github.com/haotian-liu/LLaVA),Vicuna-13B,13B,Generate,61.6,68.2,42.7,74.9,71.3,68.9,63.5,61.3,51.4,73.2,77
|
9 |
-
ImageLLM,[LLaVA-v1.5-13B-LoRA](https://llava-vl.github.io),Vicuna-13B-v1.5,13B,PPL,62.4,68.2,40.5,74.9,70.9,70.1,62.5,60.6,52.4,74.2,77.3,26.7,47.5,36
|
10 |
-
ImageLLM,[LLaVA-v1.5-LoRA](https://llava-vl.github.io),Vicuna-13B-v1.5,13B,PPL for A/B/C/D,62.8,68.9,39.5,75.2,71.4,72
|
11 |
ImageLLM,[MiniGPT-4](https://github.com/Vision-CAIR/MiniGPT-4),Vicuna-7B,7B,PPL,42.8,47.4,29.9,56.3,49.2,45.8,37.9,45.3,32.6,47.4,57.1,11.8,38.2,24.5,27.1
|
12 |
-
ImageLLM,[VPGTrans](https://github.com/VPGTrans/VPGTrans),LLaMA-7B,7B,PPL,39.1,41.8,31.4,51.9,44.1,39.9,36.1,33.7,36.4,32
|
13 |
-
ImageLLM,[MultiModal-GPT](https://github.com/open-mmlab/Multimodal-GPT),LLaMA-7B,7B,PPL,33.2,34.5,29.2,43.6,37.9,31.5,30.8,27.3,30.1,29.9,51.4,18.8,36.9,25.8,24
|
14 |
-
ImageLLM,[Otter](https://github.com/Luodian/Otter),LLaMA-7B,7B,PPL,33.9,35.2,30.4,44.9,38.6,32.2,30.9,26.3,31.8,32
|
15 |
ImageLLM,[Otter](https://github.com/Luodian/Otter),MPT-7B,7B,PPL,39.7,42.9,30.6,51.3,43.5,42.3,34.2,38.4,30.9,40.2,55.3,24.7,36.8,29.2,23.8
|
16 |
-
ImageLLM,[OpenFlamingo](https://github.com/mlfoundations/open_flamingo),LLaMA-7B,7B,PPL,33.1,34.5,29.3,43.9,38.1,31.3,30.1,27.3,30.6,29.9,50.2,20
|
17 |
-
ImageLLM,[OpenFlamingo](https://github.com/mlfoundations/open_flamingo),MPT-7B,7B,PPL,40.9,42.7,35.7,53.2,45.3,40
|
18 |
-
ImageLLM,[LLaMA-AdapterV2](https://github.com/OpenGVLab/LLaMA-Adapter),LLaMA-7B,7B,PPL,32.7,35.2,25.8,45.2,38.5,29.3,33
|
19 |
-
ImageLLM,[GVT](https://github.com/TencentARC/GVT),Vicuna-7B,7B,PPL,33.5,35.5,27.8,41.7,35.5,31.8,29.5,36.2,32
|
20 |
-
ImageLLM,[mPLUG-Owl](https://github.com/X-PLUG/mPLUG-Owl),LLaMA-7B,7B,PPL,34
|
21 |
-
ImageLLM,[Kosmos-2](https://github.com/microsoft/unilm/tree/master/kosmos-2),Decoder Only 1.3B,1.3B,PPL,50
|
22 |
ImageLLM,[Qwen-VL-Chat](https://huggingface.co/Qwen/Qwen-VL-Chat),Qwen-7B,7B,PPL for A/B/C/D,58.2,65.4,37.8,73.3,67.3,69.6,57.7,52.9,48.2,59.8,74.6,53.5,43.9,39.2,26.7
|
23 |
-
ImageLLM,[Qwen-VL](https://huggingface.co/Qwen/Qwen-VL),Qwen-7B,7B,PPL for A/B/C/D,56.3,62.3,39.1,71.2,66.4,67.7,53.5,44.8,43.8,62.9,74.9,51.2,44.7,38.5,32
|
24 |
-
ImageLLM,[Qwen-VL-plus](https://github.com/QwenLM/Qwen-VL/tree/master?tab=readme-ov-file#qwen-vl-plus),Qwen-LM,-,PPL for A/B/C/D,66.8,72.7,44.7,76.5,77.6,75.3,64.9,66.3,56.8,69.1,78.2,54.7,51.6,39.2,41
|
25 |
-
ImageLLM,[IDEFICS-9b-instruct](https://huggingface.co/HuggingFaceM4/idefics-9b-instruct),LLaMA-7B,7B,NG,0
|
26 |
-
ImageLLM,[IDEFICS-80b-instruct](https://huggingface.co/HuggingFaceM4/idefics-9b-instruct),LLaMA-65B,65B,NG,0
|
27 |
-
ImageLLM,[InternLM-XComposer-VL](https://github.com/InternLM/InternLM-XComposer),InternLM-7B,7B,PPL,0
|
28 |
-
ImageLLM,[SEED-LLaMA](https://github.com/AILab-CVC/SEED),LLaMA2-Chat-13B,13B,PPL,48.9,53.7,35.4,64.1,54.2,54.1,46.5,45.3,38.2,51.6,60.7,44.7,37.8,45.3,20
|
29 |
-
ImageLLM,[mPLUG-Owl2](https://github.com/X-PLUG/mPLUG-Owl),LLaMA-7B,7B,NG,57.8,64.1,39.8,72.7,67.6,63.6,53.6,58.5,50.8,70.1,76.4,30.2,46
|
30 |
ImageLLM,[LLaMA-VID-7B](https://github.com/dvlab-research/LLaMA-VID),LLaMA-7B,7B,Generate,59.9,67.6,37.9,75.4,71.2,68.9,62.9,58.4,50.7,70.1,76.1,54.7,42.8,35.2,35.6
|
31 |
-
ImageLLM,[Pink-LLaMA2](https://github.com/SY-Xuan/Pink/stargazers),LLaMA2-7B,7B,NG,0
|
32 |
-
ImageLLM,[InfMLLM-13B](https://github.com/mightyzau/InfMLLM),Vicuna-13B,13B,Generate,62.3,69.6,41.5,75.5,73
|
33 |
-
ImageLLM,[ShareGPT4V-7B](https://github.com/InternLM/InternLM-XComposer/tree/main/projects/ShareGPT4V),Vicuna-7B,7B,Generate,0
|
34 |
-
ImageLLM,[ShareGPT4V-13B](https://github.com/InternLM/InternLM-XComposer/tree/main/projects/ShareGPT4V),Vicuna-13B,13B,Generate,0
|
35 |
-
ImageLLM,[Honeybee-13B](https://github.com/kakaobrain/honeybee),Vicuna-13B,13B,Generate,0
|
36 |
-
ImageLLM,[SPHINXv1-1k](https://github.com/Alpha-VLLM/LLaMA2-Accessory/tree/main/SPHINX),LLaMA-2-13B,13B,Generate,63.9,71.6,34.7,75.4,72.2,75.1,64.2,68.2,49.3,66
|
37 |
ImageLLM,[SPHINXv2-1k](https://github.com/Alpha-VLLM/LLaMA2-Accessory/tree/main/SPHINX),LLaMA-2-13B,13B,Generate,67.5,74.8,39.8,77.7,77.4,76.8,69.4,71.2,59.4,70.1,78.3,74.1,48.1,37.9,29.9
|
38 |
ImageLLM,[GPT-4V](https://openai.com/research/gpt-4v-system-card),\-,-,Generate,67.3,69.1,60.5,77.5,73.9,70.6,61.8,56.8,56.9,74.2,78.5,57.6,65.7,51.7,63.4
|
39 |
-
VideoLLM,[VideoChat](https://github.com/OpenGVLab/Ask-Anything),Vicuna-7B,7B,PPL,37.6,39
|
40 |
VideoLLM,[Video-ChatGPT](https://github.com/mbzuai-oryx/Video-ChatGPT),LLaMA-7B,7B,PPL,31.2,33.9,23.5,37.2,31.4,33.2,28.4,35.5,29.5,23.7,42.3,25.9,27.6,21.3,21.1
|
41 |
-
VideoLLM,[Valley](https://github.com/RupertLuo/Valley),LLaMA-13B,13B,PPL,30.3,32
|
42 |
-
Other,[Unified-IO-2 7B (2.5M)](),from scratch,7B,PPL,60.5,65.6,46
|
43 |
-
Other,[Unified-IO-2 7B](),from scratch,7B,PPL,60.4,65.5,46
|
44 |
-
Other,[Unified-IO-2 3B (3M)](),from scratch,3B,PPL,60.2,64.1,45.6,69
|
45 |
-
Other,[Unified-IO-2 3B](),from scratch,3B,PPL,58.7,63.8,44.2,68.8,65.8,67.2,52.9,60.4,43.1,55.7,64
|
46 |
-
Other,[Unified-IO-2 1B](),from scratch,1B,PPL,49.6,55.1,34
|
|
|
1 |
Model Type,Model,Language Model,Model Size,Evaluation Method,Avg. All,Avg. Img,Avg. Video,Scene Understanding,Instance Identity,Instance Attribute,Instance Location,Instance Counting,Spatial Relation,Instance Interaction,Visual Reasoning,Text Recognition,Action Recognition,Action Prediction,Procedure Understanding
|
2 |
+
LLM,[Flan-T5](https://huggingface.co/google/flan-t5-xl),Flan-T5-XL,3B,PPL,27.7,27.3,28.6,23,29,32.8,31.8,20.5,31.8,33,18.2,19.4,23.2,34.9,25.4
|
3 |
LLM,[Vicuna](https://huggingface.co/lmsys/vicuna-7b-v1.3),Vicuna-7B,7B,PPL,28.5,28.2,29.5,23.4,30.7,29.7,30.9,30.8,28.6,29.8,18.5,13.4,27.3,34.5,23.8
|
4 |
+
LLM,[LLaMA](https://research.facebook.com/publications/llama-open-and-efficient-foundation-language-models/),LLaMA-7B,7B,PPL,26.8,26.6,27.3,26.3,27.4,26.2,28.3,25.1,28.8,19.2,37,9,33,23.1,26.2
|
5 |
+
ImageLLM,[BLIP-2](https://github.com/salesforce/LAVIS),Flan-T5-XL,3B,PPL,46.4,49.7,36.7,59.1,53.9,49.2,42.3,43.2,36.7,55.7,45.6,25.9,32.6,47.5,24
|
6 |
ImageLLM,[InstructBLIP](https://github.com/salesforce/LAVIS),Flan-T5-XL,3B,PPL,52.7,57.8,38.3,60.3,58.5,63.4,40.6,58.4,38.7,51.6,45.9,25.9,33.1,49.1,27.1
|
7 |
ImageLLM,[InstructBLIP-Vicuna](https://github.com/salesforce/LAVIS),Vicuna-7B,7B,PPL,53.4,58.8,38.1,60.2,58.9,65.6,43.6,57.2,40.3,52.6,47.7,43.5,34.5,49.6,23.1
|
8 |
+
ImageLLM,[LLaVA-1.5](https://github.com/haotian-liu/LLaVA),Vicuna-13B,13B,Generate,61.6,68.2,42.7,74.9,71.3,68.9,63.5,61.3,51.4,73.2,77,60.5,48.9,41.1,36.6
|
9 |
+
ImageLLM,[LLaVA-v1.5-13B-LoRA](https://llava-vl.github.io),Vicuna-13B-v1.5,13B,PPL,62.4,68.2,40.5,74.9,70.9,70.1,62.5,60.6,52.4,74.2,77.3,26.7,47.5,36,35.7
|
10 |
+
ImageLLM,[LLaVA-v1.5-LoRA](https://llava-vl.github.io),Vicuna-13B-v1.5,13B,PPL for A/B/C/D,62.8,68.9,39.5,75.2,71.4,72,62.7,59.8,51.1,71.1,80.7,39.5,46.1,37.1,32.6
|
11 |
ImageLLM,[MiniGPT-4](https://github.com/Vision-CAIR/MiniGPT-4),Vicuna-7B,7B,PPL,42.8,47.4,29.9,56.3,49.2,45.8,37.9,45.3,32.6,47.4,57.1,11.8,38.2,24.5,27.1
|
12 |
+
ImageLLM,[VPGTrans](https://github.com/VPGTrans/VPGTrans),LLaMA-7B,7B,PPL,39.1,41.8,31.4,51.9,44.1,39.9,36.1,33.7,36.4,32,53.2,30.6,39.5,24.3,31.9
|
13 |
+
ImageLLM,[MultiModal-GPT](https://github.com/open-mmlab/Multimodal-GPT),LLaMA-7B,7B,PPL,33.2,34.5,29.2,43.6,37.9,31.5,30.8,27.3,30.1,29.9,51.4,18.8,36.9,25.8,24
|
14 |
+
ImageLLM,[Otter](https://github.com/Luodian/Otter),LLaMA-7B,7B,PPL,33.9,35.2,30.4,44.9,38.6,32.2,30.9,26.3,31.8,32,51.4,31.8,37.9,27.2,24.8
|
15 |
ImageLLM,[Otter](https://github.com/Luodian/Otter),MPT-7B,7B,PPL,39.7,42.9,30.6,51.3,43.5,42.3,34.2,38.4,30.9,40.2,55.3,24.7,36.8,29.2,23.8
|
16 |
+
ImageLLM,[OpenFlamingo](https://github.com/mlfoundations/open_flamingo),LLaMA-7B,7B,PPL,33.1,34.5,29.3,43.9,38.1,31.3,30.1,27.3,30.6,29.9,50.2,20,37.2,25.4,24.2
|
17 |
+
ImageLLM,[OpenFlamingo](https://github.com/mlfoundations/open_flamingo),MPT-7B,7B,PPL,40.9,42.7,35.7,53.2,45.3,40,31.2,39.3,32.6,36.1,51.4,25.9,42.9,34.7,26.9
|
18 |
+
ImageLLM,[LLaMA-AdapterV2](https://github.com/OpenGVLab/LLaMA-Adapter),LLaMA-7B,7B,PPL,32.7,35.2,25.8,45.2,38.5,29.3,33,29.7,35.5,39.2,52,24.7,38.6,18.5,19.6
|
19 |
+
ImageLLM,[GVT](https://github.com/TencentARC/GVT),Vicuna-7B,7B,PPL,33.5,35.5,27.8,41.7,35.5,31.8,29.5,36.2,32,32,51.1,27.1,33.9,25.4,23
|
20 |
+
ImageLLM,[mPLUG-Owl](https://github.com/X-PLUG/mPLUG-Owl),LLaMA-7B,7B,PPL,34,37.9,23,49.7,45.3,32.5,36.7,27.3,32.7,44.3,54.7,28.8,26.7,17.9,26.5
|
21 |
+
ImageLLM,[Kosmos-2](https://github.com/microsoft/unilm/tree/master/kosmos-2),Decoder Only 1.3B,1.3B,PPL,50,54.4,37.5,63.4,57.1,58.5,44,41.4,37.9,55.7,60.7,25.9,41.3,40.4,27
|
22 |
ImageLLM,[Qwen-VL-Chat](https://huggingface.co/Qwen/Qwen-VL-Chat),Qwen-7B,7B,PPL for A/B/C/D,58.2,65.4,37.8,73.3,67.3,69.6,57.7,52.9,48.2,59.8,74.6,53.5,43.9,39.2,26.7
|
23 |
+
ImageLLM,[Qwen-VL](https://huggingface.co/Qwen/Qwen-VL),Qwen-7B,7B,PPL for A/B/C/D,56.3,62.3,39.1,71.2,66.4,67.7,53.5,44.8,43.8,62.9,74.9,51.2,44.7,38.5,32
|
24 |
+
ImageLLM,[Qwen-VL-plus](https://github.com/QwenLM/Qwen-VL/tree/master?tab=readme-ov-file#qwen-vl-plus),Qwen-LM,-,PPL for A/B/C/D,66.8,72.7,44.7,76.5,77.6,75.3,64.9,66.3,56.8,69.1,78.2,54.7,51.6,39.2,41
|
25 |
+
ImageLLM,[IDEFICS-9b-instruct](https://huggingface.co/HuggingFaceM4/idefics-9b-instruct),LLaMA-7B,7B,NG,0,44.5,0,55.8,45.3,42.3,40.2,36.8,34.9,37.1,55.9,38.8,0,0,0
|
26 |
+
ImageLLM,[IDEFICS-80b-instruct](https://huggingface.co/HuggingFaceM4/idefics-9b-instruct),LLaMA-65B,65B,NG,0,53.2,0,64,52.6,50.8,48.3,46.1,45.5,62.9,68,51.8,0,0,0
|
27 |
+
ImageLLM,[InternLM-XComposer-VL](https://github.com/InternLM/InternLM-XComposer),InternLM-7B,7B,PPL,0,66.9,0,75,71.7,67.6,60.8,56.2,55.3,74.4,77,48.5,0,0,0
|
28 |
+
ImageLLM,[SEED-LLaMA](https://github.com/AILab-CVC/SEED),LLaMA2-Chat-13B,13B,PPL,48.9,53.7,35.4,64.1,54.2,54.1,46.5,45.3,38.2,51.6,60.7,44.7,37.8,45.3,20
|
29 |
+
ImageLLM,[mPLUG-Owl2](https://github.com/X-PLUG/mPLUG-Owl),LLaMA-7B,7B,NG,57.8,64.1,39.8,72.7,67.6,63.6,53.6,58.5,50.8,70.1,76.4,30.2,46,38.7,32.9
|
30 |
ImageLLM,[LLaMA-VID-7B](https://github.com/dvlab-research/LLaMA-VID),LLaMA-7B,7B,Generate,59.9,67.6,37.9,75.4,71.2,68.9,62.9,58.4,50.7,70.1,76.1,54.7,42.8,35.2,35.6
|
31 |
+
ImageLLM,[Pink-LLaMA2](https://github.com/SY-Xuan/Pink/stargazers),LLaMA2-7B,7B,NG,0,67,0,75.2,70.1,70.1,63.3,53.8,50.2,69.1,74.3,50,0,0,0
|
32 |
+
ImageLLM,[InfMLLM-13B](https://github.com/mightyzau/InfMLLM),Vicuna-13B,13B,Generate,62.3,69.6,41.5,75.5,73,70.4,66.2,63.3,54.2,72.2,77.9,37.2,49.5,39,33.9
|
33 |
+
ImageLLM,[ShareGPT4V-7B](https://github.com/InternLM/InternLM-XComposer/tree/main/projects/ShareGPT4V),Vicuna-7B,7B,Generate,0,69.7,0,75.3,71.4,72.3,63.1,62,53.9,70.1,79.8,54.7,0,0,0
|
34 |
+
ImageLLM,[ShareGPT4V-13B](https://github.com/InternLM/InternLM-XComposer/tree/main/projects/ShareGPT4V),Vicuna-13B,13B,Generate,0,70.8,0,75.9,74.1,73.5,66.8,62.4,54.8,75.3,77.3,46.5,0,0,0
|
35 |
+
ImageLLM,[Honeybee-13B](https://github.com/kakaobrain/honeybee),Vicuna-13B,13B,Generate,0,68.6,0,75.4,72.8,69,64.5,60.6,55.1,72.2,77.9,41.9,0,0,0
|
36 |
+
ImageLLM,[SPHINXv1-1k](https://github.com/Alpha-VLLM/LLaMA2-Accessory/tree/main/SPHINX),LLaMA-2-13B,13B,Generate,63.9,71.6,34.7,75.4,72.2,75.1,64.2,68.2,49.3,66,78.6,62.4,41.2,33.9,26.1
|
37 |
ImageLLM,[SPHINXv2-1k](https://github.com/Alpha-VLLM/LLaMA2-Accessory/tree/main/SPHINX),LLaMA-2-13B,13B,Generate,67.5,74.8,39.8,77.7,77.4,76.8,69.4,71.2,59.4,70.1,78.3,74.1,48.1,37.9,29.9
|
38 |
ImageLLM,[GPT-4V](https://openai.com/research/gpt-4v-system-card),\-,-,Generate,67.3,69.1,60.5,77.5,73.9,70.6,61.8,56.8,56.9,74.2,78.5,57.6,65.7,51.7,63.4
|
39 |
+
VideoLLM,[VideoChat](https://github.com/OpenGVLab/Ask-Anything),Vicuna-7B,7B,PPL,37.6,39,33.7,47.1,43.8,34.9,40,32.8,34.6,42.3,50.5,17.7,34.9,36.4,27.3
|
40 |
VideoLLM,[Video-ChatGPT](https://github.com/mbzuai-oryx/Video-ChatGPT),LLaMA-7B,7B,PPL,31.2,33.9,23.5,37.2,31.4,33.2,28.4,35.5,29.5,23.7,42.3,25.9,27.6,21.3,21.1
|
41 |
+
VideoLLM,[Valley](https://github.com/RupertLuo/Valley),LLaMA-13B,13B,PPL,30.3,32,25.4,39.3,32.9,31.6,27.9,24.2,30.1,27.8,43.8,11.8,31.3,23.2,20.7
|
42 |
+
Other,[Unified-IO-2 7B (2.5M)](),from scratch,7B,PPL,60.5,65.6,46,70.7,69,67.4,55.4,62.6,45.5,60.8,67.1,58.1,57.5,43.2,34
|
43 |
+
Other,[Unified-IO-2 7B](),from scratch,7B,PPL,60.4,65.5,46,71.3,68.8,67.5,55.5,61.2,45.4,62.9,66.5,59.3,58,42.7,34
|
44 |
+
Other,[Unified-IO-2 3B (3M)](),from scratch,3B,PPL,60.2,64.1,45.6,69,66.6,66.5,54.3,62,42.3,50.5,65.3,44.2,57.5,36.2,39.4
|
45 |
+
Other,[Unified-IO-2 3B](),from scratch,3B,PPL,58.7,63.8,44.2,68.8,65.8,67.2,52.9,60.4,43.1,55.7,64,41.9,57.5,36,39
|
46 |
+
Other,[Unified-IO-2 1B](),from scratch,1B,PPL,49.6,55.1,34,63.8,57.7,54.6,41.9,53.7,33.3,51.5,58.3,47.7,39.8,34.5,24.6
|
file/result_task.csv
ADDED
@@ -0,0 +1,46 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
Model Type,Model,Language Model,Model Size,Evaluation Method,Avg. All,Avg. Img,Avg. Video,Scene Understanding,Instance Identity,Instance Attribute,Instance Location,Instance Counting,Spatial Relation,Instance Interaction,Visual Reasoning,Text Recognition,Action Recognition,Action Prediction,Procedure Understanding
|
2 |
+
LLM,[Flan-T5](https://huggingface.co/google/flan-t5-xl),Flan-T5-XL,3B,PPL,26.9,26.6,27.8,23,29,32.8,31.8,20.5,31.8,33,18.2,19.4,23.2,34.9,25.4
|
3 |
+
LLM,[Vicuna](https://huggingface.co/lmsys/vicuna-7b-v1.3),Vicuna-7B,7B,PPL,26.8,26.2,28.5,23.4,30.7,29.7,30.9,30.8,28.6,29.8,18.5,13.4,27.3,34.5,23.8
|
4 |
+
LLM,[LLaMA](https://research.facebook.com/publications/llama-open-and-efficient-foundation-language-models/),LLaMA-7B,7B,PPL,25.8,25.3,27.4,26.3,27.4,26.2,28.3,25.1,28.8,19.2,37,9,33,23.1,26.2
|
5 |
+
ImageLLM,[BLIP-2](https://github.com/salesforce/LAVIS),Flan-T5-XL,3B,PPL,43.0,45.7,34.7,59.1,53.9,49.2,42.3,43.2,36.7,55.7,45.6,25.9,32.6,47.5,24
|
6 |
+
ImageLLM,[InstructBLIP](https://github.com/salesforce/LAVIS),Flan-T5-XL,3B,PPL,46.1,49.3,36.4,60.3,58.5,63.4,40.6,58.4,38.7,51.6,45.9,25.9,33.1,49.1,27.1
|
7 |
+
ImageLLM,[InstructBLIP-Vicuna](https://github.com/salesforce/LAVIS),Vicuna-7B,7B,PPL,48.1,52.2,35.7,60.2,58.9,65.6,43.6,57.2,40.3,52.6,47.7,43.5,34.5,49.6,23.1
|
8 |
+
ImageLLM,[LLaVA-1.5](https://github.com/haotian-liu/LLaVA),Vicuna-13B,13B,Generate,60.7,66.9,42.2,74.9,71.3,68.9,63.5,61.3,51.4,73.2,77,60.5,48.9,41.1,36.6
|
9 |
+
ImageLLM,[LLaVA-v1.5-13B-LoRA](https://llava-vl.github.io),Vicuna-13B-v1.5,13B,PPL,57.4,63.3,39.7,74.9,70.9,70.1,62.5,60.6,52.4,74.2,77.3,26.7,47.5,36,35.7
|
10 |
+
ImageLLM,[LLaVA-v1.5-LoRA](https://llava-vl.github.io),Vicuna-13B-v1.5,13B,PPL for A/B/C/D,58.3,64.8,38.6,75.2,71.4,72,62.7,59.8,51.1,71.1,80.7,39.5,46.1,37.1,32.6
|
11 |
+
ImageLLM,[MiniGPT-4](https://github.com/Vision-CAIR/MiniGPT-4),Vicuna-7B,7B,PPL,39.4,42.6,29.9,56.3,49.2,45.8,37.9,45.3,32.6,47.4,57.1,11.8,38.2,24.5,27.1
|
12 |
+
ImageLLM,[VPGTrans](https://github.com/VPGTrans/VPGTrans),LLaMA-7B,7B,PPL,37.8,39.8,31.9,51.9,44.1,39.9,36.1,33.7,36.4,32,53.2,30.6,39.5,24.3,31.9
|
13 |
+
ImageLLM,[MultiModal-GPT](https://github.com/open-mmlab/Multimodal-GPT),LLaMA-7B,7B,PPL,32.3,33.5,28.9,43.6,37.9,31.5,30.8,27.3,30.1,29.9,51.4,18.8,36.9,25.8,24
|
14 |
+
ImageLLM,[Otter](https://github.com/Luodian/Otter),LLaMA-7B,7B,PPL,34.2,35.5,30.0,44.9,38.6,32.2,30.9,26.3,31.8,32,51.4,31.8,37.9,27.2,24.8
|
15 |
+
ImageLLM,[Otter](https://github.com/Luodian/Otter),MPT-7B,7B,PPL,37.6,40.1,29.9,51.3,43.5,42.3,34.2,38.4,30.9,40.2,55.3,24.7,36.8,29.2,23.8
|
16 |
+
ImageLLM,[OpenFlamingo](https://github.com/mlfoundations/open_flamingo),LLaMA-7B,7B,PPL,32.4,33.5,28.9,43.9,38.1,31.3,30.1,27.3,30.6,29.9,50.2,20,37.2,25.4,24.2
|
17 |
+
ImageLLM,[OpenFlamingo](https://github.com/mlfoundations/open_flamingo),MPT-7B,7B,PPL,38.3,39.4,34.8,53.2,45.3,40,31.2,39.3,32.6,36.1,51.4,25.9,42.9,34.7,26.9
|
18 |
+
ImageLLM,[LLaMA-AdapterV2](https://github.com/OpenGVLab/LLaMA-Adapter),LLaMA-7B,7B,PPL,33.7,36.3,25.6,45.2,38.5,29.3,33,29.7,35.5,39.2,52,24.7,38.6,18.5,19.6
|
19 |
+
ImageLLM,[GVT](https://github.com/TencentARC/GVT),Vicuna-7B,7B,PPL,33.3,35.2,27.4,41.7,35.5,31.8,29.5,36.2,32,32,51.1,27.1,33.9,25.4,23
|
20 |
+
ImageLLM,[mPLUG-Owl](https://github.com/X-PLUG/mPLUG-Owl),LLaMA-7B,7B,PPL,35.3,39.1,23.7,49.7,45.3,32.5,36.7,27.3,32.7,44.3,54.7,28.8,26.7,17.9,26.5
|
21 |
+
ImageLLM,[Kosmos-2](https://github.com/microsoft/unilm/tree/master/kosmos-2),Decoder Only 1.3B,1.3B,PPL,46.1,49.4,36.2,63.4,57.1,58.5,44,41.4,37.9,55.7,60.7,25.9,41.3,40.4,27
|
22 |
+
ImageLLM,[Qwen-VL-Chat](https://huggingface.co/Qwen/Qwen-VL-Chat),Qwen-7B,7B,PPL for A/B/C/D,55.6,61.9,36.6,73.3,67.3,69.6,57.7,52.9,48.2,59.8,74.6,53.5,43.9,39.2,26.7
|
23 |
+
ImageLLM,[Qwen-VL](https://huggingface.co/Qwen/Qwen-VL),Qwen-7B,7B,PPL for A/B/C/D,54.3,59.6,38.4,71.2,66.4,67.7,53.5,44.8,43.8,62.9,74.9,51.2,44.7,38.5,32
|
24 |
+
ImageLLM,[Qwen-VL-plus](https://github.com/QwenLM/Qwen-VL/tree/master?tab=readme-ov-file#qwen-vl-plus),Qwen-LM,-,PPL for A/B/C/D,62.6,68.8,43.9,76.5,77.6,75.3,64.9,66.3,56.8,69.1,78.2,54.7,51.6,39.2,41
|
25 |
+
ImageLLM,[IDEFICS-9b-instruct](https://huggingface.co/HuggingFaceM4/idefics-9b-instruct),LLaMA-7B,7B,NG,32.3,43.0,0.0,55.8,45.3,42.3,40.2,36.8,34.9,37.1,55.9,38.8,0,0,0
|
26 |
+
ImageLLM,[IDEFICS-80b-instruct](https://huggingface.co/HuggingFaceM4/idefics-9b-instruct),LLaMA-65B,65B,NG,40.8,54.4,0.0,64,52.6,50.8,48.3,46.1,45.5,62.9,68,51.8,0,0,0
|
27 |
+
ImageLLM,[InternLM-XComposer-VL](https://github.com/InternLM/InternLM-XComposer),InternLM-7B,7B,PPL,48.9,65.2,0.0,75,71.7,67.6,60.8,56.2,55.3,74.4,77,48.5,0,0,0
|
28 |
+
ImageLLM,[SEED-LLaMA](https://github.com/AILab-CVC/SEED),LLaMA2-Chat-13B,13B,PPL,46.9,51.0,34.4,64.1,54.2,54.1,46.5,45.3,38.2,51.6,60.7,44.7,37.8,45.3,20
|
29 |
+
ImageLLM,[mPLUG-Owl2](https://github.com/X-PLUG/mPLUG-Owl),LLaMA-7B,7B,NG,55.1,60.4,39.2,72.7,67.6,63.6,53.6,58.5,50.8,70.1,76.4,30.2,46,38.7,32.9
|
30 |
+
ImageLLM,[LLaMA-VID-7B](https://github.com/dvlab-research/LLaMA-VID),LLaMA-7B,7B,Generate,58.5,65.4,37.9,75.4,71.2,68.9,62.9,58.4,50.7,70.1,76.1,54.7,42.8,35.2,35.6
|
31 |
+
ImageLLM,[Pink-LLaMA2](https://github.com/SY-Xuan/Pink/stargazers),LLaMA2-7B,7B,NG,48.0,64.0,0.0,75.2,70.1,70.1,63.3,53.8,50.2,69.1,74.3,50,0,0,0
|
32 |
+
ImageLLM,[InfMLLM-13B](https://github.com/mightyzau/InfMLLM),Vicuna-13B,13B,Generate,59.4,65.5,40.8,75.5,73,70.4,66.2,63.3,54.2,72.2,77.9,37.2,49.5,39,33.9
|
33 |
+
ImageLLM,[ShareGPT4V-7B](https://github.com/InternLM/InternLM-XComposer/tree/main/projects/ShareGPT4V),Vicuna-7B,7B,Generate,50.2,67.0,0.0,75.3,71.4,72.3,63.1,62,53.9,70.1,79.8,54.7,0,0,0
|
34 |
+
ImageLLM,[ShareGPT4V-13B](https://github.com/InternLM/InternLM-XComposer/tree/main/projects/ShareGPT4V),Vicuna-13B,13B,Generate,50.6,67.4,0.0,75.9,74.1,73.5,66.8,62.4,54.8,75.3,77.3,46.5,0,0,0
|
35 |
+
ImageLLM,[Honeybee-13B](https://github.com/kakaobrain/honeybee),Vicuna-13B,13B,Generate,49.1,65.5,0.0,75.4,72.8,69,64.5,60.6,55.1,72.2,77.9,41.9,0,0,0
|
36 |
+
ImageLLM,[SPHINXv1-1k](https://github.com/Alpha-VLLM/LLaMA2-Accessory/tree/main/SPHINX),LLaMA-2-13B,13B,Generate,59.4,67.9,33.7,75.4,72.2,75.1,64.2,68.2,49.3,66,78.6,62.4,41.2,33.9,26.1
|
37 |
+
ImageLLM,[SPHINXv2-1k](https://github.com/Alpha-VLLM/LLaMA2-Accessory/tree/main/SPHINX),LLaMA-2-13B,13B,Generate,64.2,72.7,38.6,77.7,77.4,76.8,69.4,71.2,59.4,70.1,78.3,74.1,48.1,37.9,29.9
|
38 |
+
ImageLLM,[GPT-4V](https://openai.com/research/gpt-4v-system-card),\-,-,Generate,65.7,67.5,60.3,77.5,73.9,70.6,61.8,56.8,56.9,74.2,78.5,57.6,65.7,51.7,63.4
|
39 |
+
VideoLLM,[VideoChat](https://github.com/OpenGVLab/Ask-Anything),Vicuna-7B,7B,PPL,36.9,38.2,32.9,47.1,43.8,34.9,40,32.8,34.6,42.3,50.5,17.7,34.9,36.4,27.3
|
40 |
+
VideoLLM,[Video-ChatGPT](https://github.com/mbzuai-oryx/Video-ChatGPT),LLaMA-7B,7B,PPL,29.8,31.9,23.3,37.2,31.4,33.2,28.4,35.5,29.5,23.7,42.3,25.9,27.6,21.3,21.1
|
41 |
+
VideoLLM,[Valley](https://github.com/RupertLuo/Valley),LLaMA-13B,13B,PPL,28.7,29.9,25.1,39.3,32.9,31.6,27.9,24.2,30.1,27.8,43.8,11.8,31.3,23.2,20.7
|
42 |
+
Other,[Unified-IO-2 7B (2.5M)](),from scratch,7B,PPL,57.6,61.8,44.9,70.7,69,67.4,55.4,62.6,45.5,60.8,67.1,58.1,57.5,43.2,34
|
43 |
+
Other,[Unified-IO-2 7B](),from scratch,7B,PPL,57.8,62.0,44.9,71.3,68.8,67.5,55.5,61.2,45.4,62.9,66.5,59.3,58,42.7,34
|
44 |
+
Other,[Unified-IO-2 3B (3M)](),from scratch,3B,PPL,54.5,57.9,44.4,69,66.6,66.5,54.3,62,42.3,50.5,65.3,44.2,57.5,36.2,39.4
|
45 |
+
Other,[Unified-IO-2 3B](),from scratch,3B,PPL,54.4,57.8,44.2,68.8,65.8,67.2,52.9,60.4,43.1,55.7,64,41.9,57.5,36,39
|
46 |
+
Other,[Unified-IO-2 1B](),from scratch,1B,PPL,46.8,51.4,33.0,63.8,57.7,54.6,41.9,53.7,33.3,51.5,58.3,47.7,39.8,34.5,24.6
|
file/result_v2.csv
CHANGED
@@ -1,28 +1,28 @@
|
|
1 |
-
Model,Language Model,Model Size,Evaluation Method,Avg. P1,Avg. P2,Avg. P3,Scene Understanding,Instance Identity,Instance Attribute,Instance Location,Instance Counting,Spatial Relation,Instance Interaction,Visual Reasoning,Text Recognition,Celebrity Recognition,Landmark Recognition,Chart Understanding,Visual Referring Expression,Science Knowledge,Emotion Recognition,Visual Mathematics,Difference Spotting,Meme Comprehension,Global Video Understanding,Action Recognition,Action Predicion,Procedure Understanding,In-Context Captioning,Interleaved Image-Text Analysis,Text-to-Image Generation,Next Image Prediction,Text-Image Creation
|
2 |
-
[BLIP-2](https://github.com/salesforce/LAVIS),Flan-T5-XL,3B,PPL,
|
3 |
-
[InstructBLIP](https://github.com/salesforce/LAVIS),Flan-T5-XL,3B,PPL,
|
4 |
-
[InstructBLIP-Vicuna](https://github.com/salesforce/LAVIS),Vicuna-7B,7B,PPL,41.
|
5 |
-
[LLaVA](https://github.com/haotian-liu/LLaVA),LLaMA-7B,7B,PPL,
|
6 |
-
[MiniGPT-4](https://github.com/Vision-CAIR/MiniGPT-4),Vicuna-7B,7B,PPL,
|
7 |
-
[VPGTrans](https://github.com/VPGTrans/VPGTrans),LLaMA-7B,7B,PPL,36.2,
|
8 |
-
[MultiModal-GPT](https://github.com/open-mmlab/Multimodal-GPT),LLaMA-7B,7B,PPL,
|
9 |
-
[Otter](https://github.com/Luodian/Otter),LLaMA-7B,7B,PPL,36.
|
10 |
-
[OpenFlamingo](https://github.com/mlfoundations/open_flamingo),LLaMA-7B,7B,PPL,
|
11 |
-
[LLaMA-AdapterV2](https://github.com/OpenGVLab/LLaMA-Adapter),LLaMA-7B,7B,PPL,
|
12 |
-
[GVT](https://github.com/TencentARC/GVT),Vicuna-7B,7B,PPL,34.
|
13 |
-
[mPLUG-Owl](https://github.com/X-PLUG/mPLUG-Owl),LLaMA-7B,7B,PPL,
|
14 |
-
[Kosmos-2](https://github.com/microsoft/unilm/tree/master/kosmos-2),Decoder only 1.3B,1.3B,PPL,
|
15 |
-
[Qwen-VL-Chat](https://huggingface.co/Qwen/Qwen-VL-Chat),Qwen-7B,7B,PPL,
|
16 |
-
[Qwen-VL-plus](https://github.com/QwenLM/Qwen-VL/tree/master?tab=readme-ov-file#qwen-vl-plus),Qwen-LM,-,PPL for A/B/C/D,
|
17 |
-
[LLaVA-1.5](https://github.com/haotian-liu/LLaVA),vicuna-7B,7B,PPL,
|
18 |
-
[IDEFICS-9b-instruct](https://huggingface.co/HuggingFaceM4/idefics-9b-instruct),LLaMA-7B,7B,PPL,38.
|
19 |
-
[InternLM-XComposer-VL](https://github.com/InternLM/InternLM-XComposer),InternLM-7B,7B,PPL,
|
20 |
-
[SPHINXv1-1k](https://github.com/Alpha-VLLM/LLaMA2-Accessory/tree/main/SPHINX),LLaMA-2-13B,13B,Generate,
|
21 |
-
[SPHINXv2-1k](https://github.com/Alpha-VLLM/LLaMA2-Accessory/tree/main/SPHINX),LLaMA-2-13B,13B,Generate,
|
22 |
-
[Emu](https://github.com/baaivision/Emu),LLaMA-13B,13B,PPL,
|
23 |
-
[Next-GPT](https://github.com/NExT-GPT/NExT-GPT),vicuna-7B,7B,PPL,30.7,
|
24 |
-
[SEED-LLaMA](https://github.com/AILab-CVC/SEED),LLaMA2-Chat-13B,13B,PPL,
|
25 |
-
[GPT-4V](https://openai.com/research/gpt-4v-system-card),\-,-,Generate,69.
|
26 |
-
[VideoChat](https://github.com/OpenGVLab/Ask-Anything),Vicuna-7B,7B,PPL,
|
27 |
-
[Video-ChatGPT](https://github.com/mbzuai-oryx/Video-ChatGPT),LLaMA-7B,7B,PPL,
|
28 |
-
[Valley](https://github.com/RupertLuo/Valley),LLaMA-13B,13B,PPL,
|
|
|
1 |
+
Model,Language Model,Model Size,Evaluation Method,Avg. Single,Avg. Multi,Avg. Video,Avg. P1,Avg. P2,Avg. P3,Scene Understanding,Instance Identity,Instance Attribute,Instance Location,Instance Counting,Spatial Relation,Instance Interaction,Visual Reasoning,Text Recognition,Celebrity Recognition,Landmark Recognition,Chart Understanding,Visual Referring Expression,Science Knowledge,Emotion Recognition,Visual Mathematics,Difference Spotting,Meme Comprehension,Global Video Understanding,Action Recognition,Action Predicion,Procedure Understanding,In-Context Captioning,Interleaved Image-Text Analysis,Text-to-Image Generation,Next Image Prediction,Text-Image Creation
|
2 |
+
[BLIP-2](https://github.com/salesforce/LAVIS),Flan-T5-XL,3B,PPL,46.8,22.8,36.0,44.2,37.3,0.0,58.5,48.6,49,39.1,43.4,36.2,48.5,52.9,60.7,51.8,51.4,19.2,43.2,52.4,29.3,22,17.8,38.6,42.5,37.7,36.2,22.9,40,30.6,0,0,0
|
3 |
+
[InstructBLIP](https://github.com/salesforce/LAVIS),Flan-T5-XL,3B,PPL,52.4,25.8,36.5,48.6,36.1,0.0,58.9,49.7,61.7,35.1,58.1,34.9,47.4,55.9,61.4,48.5,45.4,26.4,41.7,47.7,34.5,21.2,22.8,35.2,41.5,36.1,40.5,24.5,36.7,34.7,0,0,0
|
4 |
+
[InstructBLIP-Vicuna](https://github.com/salesforce/LAVIS),Vicuna-7B,7B,PPL,47.5,41.1,33.0,44.2,28.4,0.0,53.6,43.9,49,37.8,56.5,35.8,43.3,56.2,57.2,60.3,44.4,27.9,39.2,39.4,23,26.5,36.5,55.4,40.4,38.6,31.2,15.6,26.7,32.7,0,0,0
|
5 |
+
[LLaVA](https://github.com/haotian-liu/LLaVA),LLaMA-7B,7B,PPL,42.4,32.5,32.6,40.2,34.3,0.0,53.8,47.5,38.3,34.2,42,34.7,40.2,52.9,46.4,51.8,45.6,30.3,40.2,37.6,34.3,20.5,27,50,44.1,36.2,25.1,18.6,40,20.4,0,0,0
|
6 |
+
[MiniGPT-4](https://github.com/Vision-CAIR/MiniGPT-4),Vicuna-7B,7B,PPL,45.0,25.7,34.3,42.5,39.0,0.0,56.3,49.2,45.8,37.9,45.3,32.6,47.4,57.1,41.8,55.2,45.2,20.2,41.2,43.3,24.2,25,19,46.7,39,38.7,27.4,28.6,45.8,22.5,0,0,0
|
7 |
+
[VPGTrans](https://github.com/VPGTrans/VPGTrans),LLaMA-7B,7B,PPL,36.6,29.3,33.2,35.8,21.9,0.0,46.9,38.6,33.6,35.6,27.5,34.4,33,50.8,47.6,52.4,38.2,30.1,34.7,36.1,31.5,27.3,24.6,44,37.8,38.2,20.9,33.5,19.2,28.6,0,0,0
|
8 |
+
[MultiModal-GPT](https://github.com/open-mmlab/Multimodal-GPT),LLaMA-7B,7B,PPL,36.7,44.1,32.6,35.9,36.7,0.0,46.9,42.5,32,32.3,27.7,29.7,29.9,48.3,35.2,60.9,50.4,24.2,42.2,37.6,32.1,27.3,40.1,56.5,37.6,38.7,25.3,24.4,39.2,30.6,0,0,0
|
9 |
+
[Otter](https://github.com/Luodian/Otter),LLaMA-7B,7B,PPL,36.0,32.0,32.3,35.2,39.0,0.0,45.9,39.7,31.9,31.6,26.4,32,33,49.2,39.3,59.7,53,23.6,41.2,36.1,37.3,22,27.4,46.7,36.6,37.9,26,24.8,42.5,30.6,0,0,0
|
10 |
+
[OpenFlamingo](https://github.com/mlfoundations/open_flamingo),LLaMA-7B,7B,PPL,36.6,43.5,32.4,35.8,36.7,0.0,46.7,42.3,31.7,33.4,27.4,29.8,29.9,47.7,35.6,60.3,49.8,24.2,42.2,39,32.1,27.3,39.9,54.9,37.6,38.4,25.2,24.1,38.3,32.7,0,0,0
|
11 |
+
[LLaMA-AdapterV2](https://github.com/OpenGVLab/LLaMA-Adapter),LLaMA-7B,7B,PPL,36.0,34.7,31.4,35.1,0.0,0.0,45.2,38.5,29.3,33,29.7,35.5,39.2,52,48.7,58.5,46.4,24.2,41.2,40.1,39.7,23.5,29.1,52.2,41.9,38.2,18.8,20.3,0,0,0,0,0
|
12 |
+
[GVT](https://github.com/TencentARC/GVT),Vicuna-7B,7B,PPL,34.9,45.8,31.0,34.2,40.2,0.0,41.7,35.5,31.8,29.5,36.2,32,32,51.1,35.2,39.4,36.4,25,36.2,31.1,20.6,22.7,41.5,59.2,40.4,29.7,26.3,24.1,42.5,34.7,0,0,0
|
13 |
+
[mPLUG-Owl](https://github.com/X-PLUG/mPLUG-Owl),LLaMA-7B,7B,PPL,38.6,38.7,31.1,36.9,29.0,0.0,49.7,45.3,32.5,36.7,27.3,32.7,44.3,54.7,49.2,70.9,49.6,23.2,44.2,44,32.5,23.5,33.5,54.9,42,37.8,18.3,19.3,29.2,28.6,0,0,0
|
14 |
+
[Kosmos-2](https://github.com/microsoft/unilm/tree/master/kosmos-2),Decoder only 1.3B,1.3B,PPL,52.4,29.4,40.7,49.6,23.7,0.0,63.4,57.1,58.5,44,41.4,37.9,55.7,60.7,68.1,82.1,51.4,21.2,48.2,43.7,30.7,28,25.2,42.8,48.5,40.8,39.5,30,24.2,22.5,0,0,0
|
15 |
+
[Qwen-VL-Chat](https://huggingface.co/Qwen/Qwen-VL-Chat),Qwen-7B,7B,PPL,50.3,37.4,34.3,46.6,38.5,0.0,56.5,47.6,54.8,46.9,54.2,40.3,55.7,55,47.4,62.4,55.6,25.2,43.7,41.2,20.6,28.8,34.3,47.2,39.7,42.8,29.6,19.1,42.5,28.6,0,0,0
|
16 |
+
[Qwen-VL-plus](https://github.com/QwenLM/Qwen-VL/tree/master?tab=readme-ov-file#qwen-vl-plus),Qwen-LM,-,PPL for A/B/C/D,71.9,70.3,46.7,66.1,29.6,0.0,76.6,77.7,76.3,65.1,65.8,55.9,73.2,77.9,61.8,97,97.2,39.5,73.4,75.8,51.7,38.6,66.7,81.8,51.8,54.5,29.3,48,28.3,32.7,0,0,0
|
17 |
+
[LLaVA-1.5](https://github.com/haotian-liu/LLaVA),vicuna-7B,7B,PPL,58.3,39.2,36.9,53.3,34.4,0.0,63.7,62.4,66.7,51.3,60.2,38.5,47.4,59.8,69,60.6,49.8,25,45.7,56.7,31.1,24.2,35.7,50.3,46.1,39.4,29.4,28.1,39.2,22.5,0,0,0
|
18 |
+
[IDEFICS-9b-instruct](https://huggingface.co/HuggingFaceM4/idefics-9b-instruct),LLaMA-7B,7B,PPL,38.8,54.5,32.9,37.5,42.6,0.0,48.2,38.2,37.8,32.9,29,32.4,37.1,54.1,45.5,52.4,52.8,22.6,42.7,33.2,26.6,21.2,56.5,48.4,42.7,38.6,23.6,20.5,45.8,34.7,0,0,0
|
19 |
+
[InternLM-XComposer-VL](https://github.com/InternLM/InternLM-XComposer),InternLM-7B,7B,PPL,65.4,49.8,44.9,60.6,30.2,0.0,74.8,70.5,67.6,60.5,55.3,53.4,76.3,76.1,61.4,86.1,78,27.2,60.3,84.8,68.9,25.8,47.7,56.6,58.6,49.9,37.6,24.9,27.5,36.7,0,0,0
|
20 |
+
[SPHINXv1-1k](https://github.com/Alpha-VLLM/LLaMA2-Accessory/tree/main/SPHINX),LLaMA-2-13B,13B,Generate,68.5,37.7,40.0,61.7,32.5,0.0,75.5,72.4,75.1,63.1,67.6,50.4,64.9,76.7,60,81.5,82.4,21.8,60.3,58.5,65.5,32.6,35.9,43.4,52.4,41.2,33.9,26.1,33.3,30.6,0,0,0
|
21 |
+
[SPHINXv2-1k](https://github.com/Alpha-VLLM/LLaMA2-Accessory/tree/main/SPHINX),LLaMA-2-13B,13B,Generate,72.1,44.8,44.2,65.4,35.5,0.0,77.5,78.5,76.6,69,71,57.5,73.2,77.6,62.1,82.7,85.2,44.5,62.3,60.3,65.3,23.5,45.7,42.1,54.6,48.1,37.9,29.9,33.3,40.8,0,0,0
|
22 |
+
[Emu](https://github.com/baaivision/Emu),LLaMA-13B,13B,PPL,46.4,31.2,37.4,44.2,45.6,45.7,59,50,43.7,37.1,44.3,33.6,49.5,58.3,61.4,68.8,61.6,19,45.7,41.5,24.2,26.4,29.3,37.1,41.9,42.7,37.9,21.8,51.7,30.6,46.8,43.2,34.2
|
23 |
+
[Next-GPT](https://github.com/NExT-GPT/NExT-GPT),vicuna-7B,7B,PPL,31.0,27.8,30.7,31.0,40.3,42.8,36.4,35.1,25.6,29.9,36.1,30.9,39.2,41.7,31,30.9,27.4,21.2,34.2,31.8,24.4,17.4,24.2,39,35.5,33.8,25.6,24.5,46.7,24.5,45.1,19.8,36.7
|
24 |
+
[SEED-LLaMA](https://github.com/AILab-CVC/SEED),LLaMA2-Chat-13B,13B,PPL,49.9,32.4,39.0,47.3,48.0,50.6,64,55,51.3,45.4,43.3,37.9,56.7,59.2,57,55.5,52.8,18.8,49.3,44.8,28.8,24.4,29.5,41.5,46.7,39.4,43.9,20.3,54.2,32.7,50.2,40.7,65.8
|
25 |
+
[GPT-4V](https://openai.com/research/gpt-4v-system-card),\-,-,Generate,69.8,73.1,61.7,68.1,37.9,0.0,77.5,73.9,70.6,61.8,56.8,56.9,74.2,78.5,82.3,91.8,97.4,45.1,71.9,66.1,71.1,43.9,67.9,89.3,64.5,65.7,51.7,63.4,29.2,59.2,0,0,0
|
26 |
+
[VideoChat](https://github.com/OpenGVLab/Ask-Anything),Vicuna-7B,7B,PPL,36.7,35.4,34.2,36.2,37.3,0.0,44.3,40.7,32.2,36.9,32.9,32.6,42.3,51.1,45.7,35.2,46.8,20.6,43.2,39.4,34.3,19.7,30.3,51.6,41.5,34,30.6,27.4,40,30.6,0,0,0
|
27 |
+
[Video-ChatGPT](https://github.com/mbzuai-oryx/Video-ChatGPT),LLaMA-7B,7B,PPL,38.3,49.8,31.6,36.9,33.7,0.0,44.1,37,35.8,30.7,44.2,31.1,29.9,49.9,39.8,49.7,40.6,22,33.2,37.2,22.4,25,46.1,61.4,42.6,32.2,27,19,37.5,24.5,0,0,0
|
28 |
+
[Valley](https://github.com/RupertLuo/Valley),LLaMA-13B,13B,PPL,35.3,40.7,28.5,33.9,33.7,0.0,45.3,36.4,33.7,30.6,27.1,31.5,35.1,52,35.2,44.9,43.4,23.8,33.2,37.2,26,22.7,37.1,52.2,31.5,32.1,21.9,26.5,35.8,28.6,0,0,0
|
file/result_v2_task.csv
ADDED
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
Model,Language Model,Model Size,Evaluation Method,Avg. Single,Avg. Multi,Avg. Video,Avg. P1,Avg. P2,Avg. P3,Scene Understanding,Instance Identity,Instance Attribute,Instance Location,Instance Counting,Spatial Relation,Instance Interaction,Visual Reasoning,Text Recognition,Celebrity Recognition,Landmark Recognition,Chart Understanding,Visual Referring Expression,Science Knowledge,Emotion Recognition,Visual Mathematics,Difference Spotting,Meme Comprehension,Global Video Understanding,Action Recognition,Action Predicion,Procedure Understanding,In-Context Captioning,Interleaved Image-Text Analysis,Text-to-Image Generation,Next Image Prediction,Text-Image Creation
|
2 |
+
[BLIP-2](https://github.com/salesforce/LAVIS),Flan-T5-XL,3B,PPL,44.1,28.2,34.8,41,35.3,0,58.5,48.6,49,39.1,43.4,36.2,48.5,52.9,60.7,51.8,51.4,19.2,43.2,52.4,29.3,22,17.8,38.6,42.5,37.7,36.2,22.9,40,30.6,0,0,0
|
3 |
+
[InstructBLIP](https://github.com/salesforce/LAVIS),Flan-T5-XL,3B,PPL,45.5,29,35.7,42.2,35.7,0,58.9,49.7,61.7,35.1,58.1,34.9,47.4,55.9,61.4,48.5,45.4,26.4,41.7,47.7,34.5,21.2,22.8,35.2,41.5,36.1,40.5,24.5,36.7,34.7,0,0,0
|
4 |
+
[InstructBLIP-Vicuna](https://github.com/salesforce/LAVIS),Vicuna-7B,7B,PPL,43.4,46,31.5,41.4,29.7,0,53.6,43.9,49,37.8,56.5,35.8,43.3,56.2,57.2,60.3,44.4,27.9,39.2,39.4,23,26.5,36.5,55.4,40.4,38.6,31.2,15.6,26.7,32.7,0,0,0
|
5 |
+
[LLaVA](https://github.com/haotian-liu/LLaVA),LLaMA-7B,7B,PPL,40.6,38.5,31,38.7,30.2,0,53.8,47.5,38.3,34.2,42,34.7,40.2,52.9,46.4,51.8,45.6,30.3,40.2,37.6,34.3,20.5,27,50,44.1,36.2,25.1,18.6,40,20.4,0,0,0
|
6 |
+
[MiniGPT-4](https://github.com/Vision-CAIR/MiniGPT-4),Vicuna-7B,7B,PPL,41.7,32.9,33.4,39.4,34.1,0,56.3,49.2,45.8,37.9,45.3,32.6,47.4,57.1,41.8,55.2,45.2,20.2,41.2,43.3,24.2,25,19,46.7,39,38.7,27.4,28.6,45.8,22.5,0,0,0
|
7 |
+
[VPGTrans](https://github.com/VPGTrans/VPGTrans),LLaMA-7B,7B,PPL,37.4,34.3,32.6,36.2,23.9,0,46.9,38.6,33.6,35.6,27.5,34.4,33,50.8,47.6,52.4,38.2,30.1,34.7,36.1,31.5,27.3,24.6,44,37.8,38.2,20.9,33.5,19.2,28.6,0,0,0
|
8 |
+
[MultiModal-GPT](https://github.com/open-mmlab/Multimodal-GPT),LLaMA-7B,7B,PPL,37.5,48.3,31.5,37.4,34.9,0,46.9,42.5,32,32.3,27.7,29.7,29.9,48.3,35.2,60.9,50.4,24.2,42.2,37.6,32.1,27.3,40.1,56.5,37.6,38.7,25.3,24.4,39.2,30.6,0,0,0
|
9 |
+
[Otter](https://github.com/Luodian/Otter),LLaMA-7B,7B,PPL,37.6,37.1,31.3,36.4,36.6,0,45.9,39.7,31.9,31.6,26.4,32,33,49.2,39.3,59.7,53,23.6,41.2,36.1,37.3,22,27.4,46.7,36.6,37.9,26,24.8,42.5,30.6,0,0,0
|
10 |
+
[OpenFlamingo](https://github.com/mlfoundations/open_flamingo),LLaMA-7B,7B,PPL,37.5,47.4,31.3,37.3,35.5,0,46.7,42.3,31.7,33.4,27.4,29.8,29.9,47.7,35.6,60.3,49.8,24.2,42.2,39,32.1,27.3,39.9,54.9,37.6,38.4,25.2,24.1,38.3,32.7,0,0,0
|
11 |
+
[LLaMA-AdapterV2](https://github.com/OpenGVLab/LLaMA-Adapter),LLaMA-7B,7B,PPL,39,40.7,29.8,37.5,0,0,45.2,38.5,29.3,33,29.7,35.5,39.2,52,48.7,58.5,46.4,24.2,41.2,40.1,39.7,23.5,29.1,52.2,41.9,38.2,18.8,20.3,0,0,0,0,0
|
12 |
+
[GVT](https://github.com/TencentARC/GVT),Vicuna-7B,7B,PPL,33.5,50.4,30.1,34.4,38.6,0,41.7,35.5,31.8,29.5,36.2,32,32,51.1,35.2,39.4,36.4,25,36.2,31.1,20.6,22.7,41.5,59.2,40.4,29.7,26.3,24.1,42.5,34.7,0,0,0
|
13 |
+
[mPLUG-Owl](https://github.com/X-PLUG/mPLUG-Owl),LLaMA-7B,7B,PPL,41.3,44.2,29.4,39.4,28.9,0,49.7,45.3,32.5,36.7,27.3,32.7,44.3,54.7,49.2,70.9,49.6,23.2,44.2,44,32.5,23.5,33.5,54.9,42,37.8,18.3,19.3,29.2,28.6,0,0,0
|
14 |
+
[Kosmos-2](https://github.com/microsoft/unilm/tree/master/kosmos-2),Decoder only 1.3B,1.3B,PPL,49.5,34,39.7,46.3,23.3,0,63.4,57.1,58.5,44,41.4,37.9,55.7,60.7,68.1,82.1,51.4,21.2,48.2,43.7,30.7,28,25.2,42.8,48.5,40.8,39.5,30,24.2,22.5,0,0,0
|
15 |
+
[Qwen-VL-Chat](https://huggingface.co/Qwen/Qwen-VL-Chat),Qwen-7B,7B,PPL,46,40.8,32.8,43.1,35.5,0,56.5,47.6,54.8,46.9,54.2,40.3,55.7,55,47.4,62.4,55.6,25.2,43.7,41.2,20.6,28.8,34.3,47.2,39.7,42.8,29.6,19.1,42.5,28.6,0,0,0
|
16 |
+
[Qwen-VL-plus](https://github.com/QwenLM/Qwen-VL/tree/master?tab=readme-ov-file#qwen-vl-plus),Qwen-LM,-,PPL for A/B/C/D,69,74.3,45.9,65.3,30.5,0,76.6,77.7,76.3,65.1,65.8,55.9,73.2,77.9,61.8,97,97.2,39.5,73.4,75.8,51.7,38.6,66.7,81.8,51.8,54.5,29.3,48,28.3,32.7,0,0,0
|
17 |
+
[LLaVA-1.5](https://github.com/haotian-liu/LLaVA),vicuna-7B,7B,PPL,50.8,43,35.8,47.3,30.8,0,63.7,62.4,66.7,51.3,60.2,38.5,47.4,59.8,69,60.6,49.8,25,45.7,56.7,31.1,24.2,35.7,50.3,46.1,39.4,29.4,28.1,39.2,22.5,0,0,0
|
18 |
+
[IDEFICS-9b-instruct](https://huggingface.co/HuggingFaceM4/idefics-9b-instruct),LLaMA-7B,7B,PPL,37.9,52.5,31.4,38,40.3,0,48.2,38.2,37.8,32.9,29,32.4,37.1,54.1,45.5,52.4,52.8,22.6,42.7,33.2,26.6,21.2,56.5,48.4,42.7,38.6,23.6,20.5,45.8,34.7,0,0,0
|
19 |
+
[InternLM-XComposer-VL](https://github.com/InternLM/InternLM-XComposer),InternLM-7B,7B,PPL,64.2,52.2,42.8,59.2,32.1,0,74.8,70.5,67.6,60.5,55.3,53.4,76.3,76.1,61.4,86.1,78,27.2,60.3,84.8,68.9,25.8,47.7,56.6,58.6,49.9,37.6,24.9,27.5,36.7,0,0,0
|
20 |
+
[SPHINXv1-1k](https://github.com/Alpha-VLLM/LLaMA2-Accessory/tree/main/SPHINX),LLaMA-2-13B,13B,Generate,63,39.7,38.4,56.4,31.9,0,75.5,72.4,75.1,63.1,67.6,50.4,64.9,76.7,60,81.5,82.4,21.8,60.3,58.5,65.5,32.6,35.9,43.4,52.4,41.2,33.9,26.1,33.3,30.6,0,0,0
|
21 |
+
[SPHINXv2-1k](https://github.com/Alpha-VLLM/LLaMA2-Accessory/tree/main/SPHINX),LLaMA-2-13B,13B,Generate,66.7,43.9,42.6,60.2,37,0,77.5,78.5,76.6,69,71,57.5,73.2,77.6,62.1,82.7,85.2,44.5,62.3,60.3,65.3,23.5,45.7,42.1,54.6,48.1,37.9,29.9,33.3,40.8,0,0,0
|
22 |
+
[Emu](https://github.com/baaivision/Emu),LLaMA-13B,13B,PPL,45.3,33.2,36.1,42.5,41.1,41.4,59,50,43.7,37.1,44.3,33.6,49.5,58.3,61.4,68.8,61.6,19,45.7,41.5,24.2,26.4,29.3,37.1,41.9,42.7,37.9,21.8,51.7,30.6,46.8,43.2,34.2
|
23 |
+
[Next-GPT](https://github.com/NExT-GPT/NExT-GPT),vicuna-7B,7B,PPL,30.8,31.6,29.9,30.7,35.6,33.9,36.4,35.1,25.6,29.9,36.1,30.9,39.2,41.7,31,30.9,27.4,21.2,34.2,31.8,24.4,17.4,24.2,39,35.5,33.8,25.6,24.5,46.7,24.5,45.1,19.8,36.7
|
24 |
+
[SEED-LLaMA](https://github.com/AILab-CVC/SEED),LLaMA2-Chat-13B,13B,PPL,46.5,35.5,37.6,43.9,43.4,52.3,64,55,51.3,45.4,43.3,37.9,56.7,59.2,57,55.5,52.8,18.8,49.3,44.8,28.8,24.4,29.5,41.5,46.7,39.4,43.9,20.3,54.2,32.7,50.2,40.7,65.8
|
25 |
+
[GPT-4V](https://openai.com/research/gpt-4v-system-card),\-,-,Generate,70,78.6,61.3,69.2,44.2,0,77.5,73.9,70.6,61.8,56.8,56.9,74.2,78.5,82.3,91.8,97.4,45.1,71.9,66.1,71.1,43.9,67.9,89.3,64.5,65.7,51.7,63.4,29.2,59.2,0,0,0
|
26 |
+
[VideoChat](https://github.com/OpenGVLab/Ask-Anything),Vicuna-7B,7B,PPL,37.4,41,33.4,37,35.3,0,44.3,40.7,32.2,36.9,32.9,32.6,42.3,51.1,45.7,35.2,46.8,20.6,43.2,39.4,34.3,19.7,30.3,51.6,41.5,34,30.6,27.4,40,30.6,0,0,0
|
27 |
+
[Video-ChatGPT](https://github.com/mbzuai-oryx/Video-ChatGPT),LLaMA-7B,7B,PPL,35.8,53.8,30.2,36.4,31,0,44.1,37,35.8,30.7,44.2,31.1,29.9,49.9,39.8,49.7,40.6,22,33.2,37.2,22.4,25,46.1,61.4,42.6,32.2,27,19,37.5,24.5,0,0,0
|
28 |
+
[Valley](https://github.com/RupertLuo/Valley),LLaMA-13B,13B,PPL,34.9,44.7,28,34.5,32.2,0,45.3,36.4,33.7,30.6,27.1,31.5,35.1,52,35.2,44.9,43.4,23.8,33.2,37.2,26,22.7,37.1,52.2,31.5,32.1,21.9,26.5,35.8,28.6,0,0,0
|
src/__pycache__/utils_display.cpython-38.pyc
CHANGED
Binary files a/src/__pycache__/utils_display.cpython-38.pyc and b/src/__pycache__/utils_display.cpython-38.pyc differ
|
|
src/auto_leaderboard/__pycache__/model_metadata_type.cpython-38.pyc
CHANGED
Binary files a/src/auto_leaderboard/__pycache__/model_metadata_type.cpython-38.pyc and b/src/auto_leaderboard/__pycache__/model_metadata_type.cpython-38.pyc differ
|
|