Spaces:

AILab-CVC
/

SEED-Bench_Leaderboard

Running

App Files Files Community

tttoaster commited on Dec 28, 2023

Commit

cde8217

•

1 Parent(s): 6451ae4

Upload 15 files

Browse files

Files changed (9) hide show

__pycache__/constants.cpython-38.pyc +0 -0
app.py +62 -37
constants.py +10 -5
file/result.csv +35 -35
file/result_task.csv +46 -0
file/result_v2.csv +28 -28
file/result_v2_task.csv +28 -0
src/__pycache__/utils_display.cpython-38.pyc +0 -0
src/auto_leaderboard/__pycache__/model_metadata_type.cpython-38.pyc +0 -0

__pycache__/constants.cpython-38.pyc CHANGED Viewed

Binary files a/__pycache__/constants.cpython-38.pyc and b/__pycache__/constants.cpython-38.pyc differ

app.py CHANGED Viewed

@@ -289,17 +289,22 @@ def add_new_eval(
             csv_data = csv_data.to_csv(CSV_V2_DIR, index=False)
     return 0
-def get_baseline_df():
-    # pdb.set_trace()
-    df = pd.read_csv(CSV_DIR)
     df = df.sort_values(by="Avg. All", ascending=False)
     present_columns = MODEL_INFO + checkbox_group.value
     df = df[present_columns]
     return df
-def get_baseline_v2_df():
     # pdb.set_trace()
-    df = pd.read_csv(CSV_V2_DIR)
     df = df.sort_values(by="Avg. P1", ascending=False)
     present_columns = MODEL_INFO_V2 + checkbox_group_v2.value
     # pdb.set_trace()
@@ -316,6 +321,10 @@ def get_all_v2_df():
     df = df.sort_values(by="Avg. P1", ascending=False)
     return df
 block = gr.Blocks()
@@ -345,25 +354,28 @@ with block:
                 interactive=True,
             )
-            # selection for model size part:
-            model_size_v2 = gr.CheckboxGroup(
-                choices=MODEL_SIZE,
-                value=MODEL_SIZE,
-                label="Model Size",
-                interactive=True,
-            )
-            # selection for model size part:
-            evaluation_method_v2 = gr.CheckboxGroup(
-                choices=EVALUATION_METHOD,
-                value=EVALUATION_METHOD,
-                label="Evaluation Method",
-                interactive=True,
-            )
             # 创建数据帧组件
             data_component_v2 = gr.components.Dataframe(
-                value=get_baseline_v2_df,
                 headers=COLUMN_V2_NAMES,
                 type="pandas",
                 datatype=DATA_TITILE_V2_TYPE,
@@ -415,7 +427,11 @@ with block:
                 # pdb.set_trace()
                 return filter_component.value
             model_size_v2.change(fn=on_filter_model_size_method_v2_change, inputs=[model_size_v2, evaluation_method_v2, checkbox_group_v2], outputs=data_component_v2)
             evaluation_method_v2.change(fn=on_filter_model_size_method_v2_change, inputs=[model_size_v2, evaluation_method_v2, checkbox_group_v2], outputs=data_component_v2)
             checkbox_group_v2.change(fn=on_filter_model_size_method_v2_change, inputs=[model_size_v2, evaluation_method_v2, checkbox_group_v2], outputs=data_component_v2)
@@ -442,31 +458,36 @@ with block:
                 interactive=True,
             )
-            # selection for model size part:
-            model_size = gr.CheckboxGroup(
-                choices=MODEL_SIZE,
-                value=MODEL_SIZE,
-                label="Model Size",
-                interactive=True,
-            )
-            # selection for model size part:
-            evaluation_method = gr.CheckboxGroup(
-                choices=EVALUATION_METHOD,
-                value=EVALUATION_METHOD,
-                label="Evaluation Method",
-                interactive=True,
-            )
             # 创建数据帧组件
             data_component = gr.components.Dataframe(
-                value=get_baseline_df,
                 headers=COLUMN_NAMES,
                 type="pandas",
                 datatype=DATA_TITILE_TYPE,
                 interactive=False,
                 visible=True,
                 )
             def on_filter_model_size_method_change(selected_model_size, selected_evaluation_method, selected_columns):
@@ -512,7 +533,11 @@ with block:
                 # pdb.set_trace()
                 return filter_component.value
             model_size.change(fn=on_filter_model_size_method_change, inputs=[model_size, evaluation_method, checkbox_group], outputs=data_component)
             evaluation_method.change(fn=on_filter_model_size_method_change, inputs=[model_size, evaluation_method, checkbox_group], outputs=data_component)
             checkbox_group.change(fn=on_filter_model_size_method_change, inputs=[model_size, evaluation_method, checkbox_group], outputs=data_component)
@@ -625,8 +650,8 @@ with block:
     def refresh_data():
-        value1 = get_baseline_df()
-        value2 = get_baseline_v2_df()
         return value1, value2

             csv_data = csv_data.to_csv(CSV_V2_DIR, index=False)
     return 0
+def get_baseline_df(average_type):
+    if average_type == 'All Average':
+        df = pd.read_csv(CSV_DIR)
+    else:
+        df = pd.read_csv(CSV_TASK_DIR)
     df = df.sort_values(by="Avg. All", ascending=False)
     present_columns = MODEL_INFO + checkbox_group.value
     df = df[present_columns]
     return df
+def get_baseline_v2_df(average_type):
     # pdb.set_trace()
+    if average_type == 'All Average':
+        df = pd.read_csv(CSV_V2_DIR)
+    else:
+        df = pd.read_csv(CSV_V2_TASK_DIR)
     df = df.sort_values(by="Avg. P1", ascending=False)
     present_columns = MODEL_INFO_V2 + checkbox_group_v2.value
     # pdb.set_trace()
     df = df.sort_values(by="Avg. P1", ascending=False)
     return df
+def switch_version(version):
+    return f"当前版本: {version}"
 block = gr.Blocks()
                 interactive=True,
             )
+            with gr.Row():
+                # selection for model size part:
+                model_size_v2 = gr.CheckboxGroup(
+                    choices=MODEL_SIZE,
+                    value=MODEL_SIZE,
+                    label="Model Size",
+                    interactive=True,
+                )
+                # selection for model size part:
+                evaluation_method_v2 = gr.CheckboxGroup(
+                    choices=EVALUATION_METHOD,
+                    value=EVALUATION_METHOD,
+                    label="Evaluation Method",
+                    interactive=True,
+                )
+                average_type_v2 = gr.Radio(AVERAGE_TYPE, label="Performance Average Type", value="All Average")
             # 创建数据帧组件
             data_component_v2 = gr.components.Dataframe(
+                value=get_baseline_v2_df(average_type_v2.value),
                 headers=COLUMN_V2_NAMES,
                 type="pandas",
                 datatype=DATA_TITILE_V2_TYPE,
                 # pdb.set_trace()
                 return filter_component.value
+            def on_average_type_v2_change(average_type_v2):
+                return get_baseline_v2_df(average_type_v2)
+            average_type_v2.change(fn=on_average_type_v2_change, inputs=[average_type_v2], outputs=data_component_v2)
             model_size_v2.change(fn=on_filter_model_size_method_v2_change, inputs=[model_size_v2, evaluation_method_v2, checkbox_group_v2], outputs=data_component_v2)
             evaluation_method_v2.change(fn=on_filter_model_size_method_v2_change, inputs=[model_size_v2, evaluation_method_v2, checkbox_group_v2], outputs=data_component_v2)
             checkbox_group_v2.change(fn=on_filter_model_size_method_v2_change, inputs=[model_size_v2, evaluation_method_v2, checkbox_group_v2], outputs=data_component_v2)
                 interactive=True,
             )
+            with gr.Row():
+                # selection for model size part:
+                model_size = gr.CheckboxGroup(
+                    choices=MODEL_SIZE,
+                    value=MODEL_SIZE,
+                    label="Model Size",
+                    interactive=True,
+                )
+                # selection for model size part:
+                evaluation_method = gr.CheckboxGroup(
+                    choices=EVALUATION_METHOD,
+                    value=EVALUATION_METHOD,
+                    label="Evaluation Method",
+                    interactive=True,
+                )
+                average_type = gr.Radio(AVERAGE_TYPE, label="Performance Average Type", value="All Average")
             # 创建数据帧组件
             data_component = gr.components.Dataframe(
+                value=get_baseline_df(average_type.value),
                 headers=COLUMN_NAMES,
                 type="pandas",
                 datatype=DATA_TITILE_TYPE,
                 interactive=False,
                 visible=True,
                 )
             def on_filter_model_size_method_change(selected_model_size, selected_evaluation_method, selected_columns):
                 # pdb.set_trace()
                 return filter_component.value
+            def on_average_type_change(average_type):
+                return get_baseline_df(average_type)
+            average_type.change(fn=on_average_type_change, inputs=[average_type], outputs=data_component)
             model_size.change(fn=on_filter_model_size_method_change, inputs=[model_size, evaluation_method, checkbox_group], outputs=data_component)
             evaluation_method.change(fn=on_filter_model_size_method_change, inputs=[model_size, evaluation_method, checkbox_group], outputs=data_component)
             checkbox_group.change(fn=on_filter_model_size_method_change, inputs=[model_size, evaluation_method, checkbox_group], outputs=data_component)
     def refresh_data():
+        value1 = get_baseline_df(average_type)
+        value2 = get_baseline_v2_df(average_type_v2)
         return value1, value2

constants.py CHANGED Viewed

@@ -6,15 +6,17 @@ EVALUATION_METHOD = ["PPL", "PPL for A/B/C/D", "Generate", "NG"]
 DIMENSION_LEVEL = ["L1", "L2", "L3"]
 LEADERBOARD_VERSION = ["Version1", "Version2"]
 TASK_INFO = ["Avg. All", "Avg. Img", "Avg. Video", "Scene Understanding", "Instance Identity", "Instance Attribute", "Instance Location", "Instance Counting", "Spatial Relation", "Instance Interaction", "Visual Reasoning", "Text Recognition",  "Action Recognition", "Action Prediction", "Procedure Understanding"]
-TASK_V2_INFO = ["Avg. P1", "Avg. P2", "Avg. P3", "Scene Understanding", "Instance Identity", "Instance Attribute", "Instance Location", "Instance Counting", "Spatial Relation", "Instance Interaction", "Visual Reasoning", "Text Recognition", "Celebrity Recognition", "Landmark Recognition", "Chart Understanding", "Visual Referring Expression", "Science Knowledge", "Emotion Recognition", "Visual Mathematics", "Difference Spotting", "Meme Comprehension", "Global Video Understanding", "Action Recognition", "Action Predicion", "Procedure Understanding", "In-Context Captioning", "Interleaved Image-Text Analysis", "Text-to-Image Generation", "Next Image Prediction", "Text-Image Creation"]
 AVG_INFO = ["Avg. All", "Avg. Img", "Avg. Video"]
-AVG_V2_INFO = ["Avg. P1", "Avg. P2", "Avg. P3"]
 DATA_TITILE_TYPE = ["markdown", "markdown", "markdown", "markdown", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number"]
-DATA_TITILE_V2_TYPE = ["markdown", "markdown","markdown", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number"]
 CSV_DIR = "./file/result.csv"
 CSV_V2_DIR = "./file/result_v2.csv"
 COLUMN_NAMES = MODEL_INFO + TASK_INFO
 COLUMN_V2_NAMES = MODEL_INFO_V2 + TASK_V2_INFO
@@ -22,6 +24,9 @@ COLUMN_V2_NAMES = MODEL_INFO_V2 + TASK_V2_INFO
 DATA_NUM = [3158, 1831, 4649, 978, 2447, 657, 97, 331, 85, 1509, 1225, 1023]
 DATA_NUM_V2 = [3158, 1831, 4649, 978, 2447, 657, 97, 331, 435, 330, 500, 501, 199, 277, 501, 132, 501, 159, 1594, 1509, 1225, 1023, 120, 49, 1008, 81, 79]
 LEADERBORAD_INTRODUCTION = """# SEED-Bench Leaderboard
     Welcome to the leaderboard of the SEED-Bench! 🏆
@@ -81,9 +86,9 @@ SUBMIT_INTRODUCTION = """# Submit on SEED Benchmark Introduction
 TABLE_INTRODUCTION = """In the table below, we summarize each task performance of all the models.
         We use accurancy(%) as the primary evaluation metric for each tasks.
-        SEED-Bench-1 calculates the overall accuracy by dividing the total number of correct QA answers by the total number of QA questions.
-        SEED-Bench-2 represents the overall accuracy using the average accuracy of each dimension.
         For PPL evaluation method, we count the loss for each candidate and select the lowest loss candidate. For detail, please refer [InternLM_Xcomposer_VL_interface](https://github.com/AILab-CVC/SEED-Bench/blob/387a067b6ba99ae5e8231f39ae2d2e453765765c/SEED-Bench-2/model/InternLM_Xcomposer_VL_interface.py#L74).

 DIMENSION_LEVEL = ["L1", "L2", "L3"]
 LEADERBOARD_VERSION = ["Version1", "Version2"]
 TASK_INFO = ["Avg. All", "Avg. Img", "Avg. Video", "Scene Understanding", "Instance Identity", "Instance Attribute", "Instance Location", "Instance Counting", "Spatial Relation", "Instance Interaction", "Visual Reasoning", "Text Recognition",  "Action Recognition", "Action Prediction", "Procedure Understanding"]
+TASK_V2_INFO = ["Avg. Single", "Avg. Multi", "Avg. Video", "Avg. P1", "Avg. P2", "Avg. P3", "Scene Understanding", "Instance Identity", "Instance Attribute", "Instance Location", "Instance Counting", "Spatial Relation", "Instance Interaction", "Visual Reasoning", "Text Recognition", "Celebrity Recognition", "Landmark Recognition", "Chart Understanding", "Visual Referring Expression", "Science Knowledge", "Emotion Recognition", "Visual Mathematics", "Difference Spotting", "Meme Comprehension", "Global Video Understanding", "Action Recognition", "Action Predicion", "Procedure Understanding", "In-Context Captioning", "Interleaved Image-Text Analysis", "Text-to-Image Generation", "Next Image Prediction", "Text-Image Creation"]
 AVG_INFO = ["Avg. All", "Avg. Img", "Avg. Video"]
+AVG_V2_INFO = ["Avg. Single", "Avg. Multi", "Avg. Video", "Avg. P1", "Avg. P2", "Avg. P3"]
 DATA_TITILE_TYPE = ["markdown", "markdown", "markdown", "markdown", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number"]
+DATA_TITILE_V2_TYPE = ["markdown", "markdown","markdown", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number"]
 CSV_DIR = "./file/result.csv"
+CSV_TASK_DIR = './file/result_task.csv'
 CSV_V2_DIR = "./file/result_v2.csv"
+CSV_V2_TASK_DIR = './file/result_v2_task.csv'
 COLUMN_NAMES = MODEL_INFO + TASK_INFO
 COLUMN_V2_NAMES = MODEL_INFO_V2 + TASK_V2_INFO
 DATA_NUM = [3158, 1831, 4649, 978, 2447, 657, 97, 331, 85, 1509, 1225, 1023]
 DATA_NUM_V2 = [3158, 1831, 4649, 978, 2447, 657, 97, 331, 435, 330, 500, 501, 199, 277, 501, 132, 501, 159, 1594, 1509, 1225, 1023, 120, 49, 1008, 81, 79]
+LEADERBORAD_VERSION = ["SEED-Bench-1", "SEED-Bench-2"]
+AVERAGE_TYPE = ["All Average", "Task Average"]
 LEADERBORAD_INTRODUCTION = """# SEED-Bench Leaderboard
     Welcome to the leaderboard of the SEED-Bench! 🏆
 TABLE_INTRODUCTION = """In the table below, we summarize each task performance of all the models.
         We use accurancy(%) as the primary evaluation metric for each tasks.
+        Performance Average Type is All Average means that calculates the overall accuracy by dividing the total number of correct QA answers by the total number of QA questions.
+        Performance Average Type is Task Average represents that the overall accuracy using the average accuracy of each dimension.
         For PPL evaluation method, we count the loss for each candidate and select the lowest loss candidate. For detail, please refer [InternLM_Xcomposer_VL_interface](https://github.com/AILab-CVC/SEED-Bench/blob/387a067b6ba99ae5e8231f39ae2d2e453765765c/SEED-Bench-2/model/InternLM_Xcomposer_VL_interface.py#L74).

file/result.csv CHANGED Viewed

@@ -1,46 +1,46 @@
 Model Type,Model,Language Model,Model Size,Evaluation Method,Avg. All,Avg. Img,Avg. Video,Scene Understanding,Instance Identity,Instance Attribute,Instance Location,Instance Counting,Spatial Relation,Instance Interaction,Visual Reasoning,Text Recognition,Action Recognition,Action Prediction,Procedure Understanding
-LLM,[Flan-T5](https://huggingface.co/google/flan-t5-xl),Flan-T5-XL,3B,PPL,27.7,27.3,28.6,23.0,29.0,32.8,31.8,20.5,31.8,33.0,18.2,19.4,23.2,34.9,25.4
 LLM,[Vicuna](https://huggingface.co/lmsys/vicuna-7b-v1.3),Vicuna-7B,7B,PPL,28.5,28.2,29.5,23.4,30.7,29.7,30.9,30.8,28.6,29.8,18.5,13.4,27.3,34.5,23.8
-LLM,[LLaMA](https://research.facebook.com/publications/llama-open-and-efficient-foundation-language-models/),LLaMA-7B,7B,PPL,26.8,26.6,27.3,26.3,27.4,26.2,28.3,25.1,28.8,19.2,37.0,9.0,33.0,23.1,26.2
-ImageLLM,[BLIP-2](https://github.com/salesforce/LAVIS),Flan-T5-XL,3B,PPL,46.4,49.7,36.7,59.1,53.9,49.2,42.3,43.2,36.7,55.7,45.6,25.9,32.6,47.5,24.0
 ImageLLM,[InstructBLIP](https://github.com/salesforce/LAVIS),Flan-T5-XL,3B,PPL,52.7,57.8,38.3,60.3,58.5,63.4,40.6,58.4,38.7,51.6,45.9,25.9,33.1,49.1,27.1
 ImageLLM,[InstructBLIP-Vicuna](https://github.com/salesforce/LAVIS),Vicuna-7B,7B,PPL,53.4,58.8,38.1,60.2,58.9,65.6,43.6,57.2,40.3,52.6,47.7,43.5,34.5,49.6,23.1
-ImageLLM,[LLaVA-1.5](https://github.com/haotian-liu/LLaVA),Vicuna-13B,13B,Generate,61.6,68.2,42.7,74.9,71.3,68.9,63.5,61.3,51.4,73.2,77.0,60.5,48.9,41.1,36.6
-ImageLLM,[LLaVA-v1.5-13B-LoRA](https://llava-vl.github.io),Vicuna-13B-v1.5,13B,PPL,62.4,68.2,40.5,74.9,70.9,70.1,62.5,60.6,52.4,74.2,77.3,26.7,47.5,36.0,35.7
-ImageLLM,[LLaVA-v1.5-LoRA](https://llava-vl.github.io),Vicuna-13B-v1.5,13B,PPL for A/B/C/D,62.8,68.9,39.5,75.2,71.4,72.0,62.7,59.8,51.1,71.1,80.7,39.5,46.1,37.1,32.6
 ImageLLM,[MiniGPT-4](https://github.com/Vision-CAIR/MiniGPT-4),Vicuna-7B,7B,PPL,42.8,47.4,29.9,56.3,49.2,45.8,37.9,45.3,32.6,47.4,57.1,11.8,38.2,24.5,27.1
-ImageLLM,[VPGTrans](https://github.com/VPGTrans/VPGTrans),LLaMA-7B,7B,PPL,39.1,41.8,31.4,51.9,44.1,39.9,36.1,33.7,36.4,32.0,53.2,30.6,39.5,24.3,31.9
-ImageLLM,[MultiModal-GPT](https://github.com/open-mmlab/Multimodal-GPT),LLaMA-7B,7B,PPL,33.2,34.5,29.2,43.6,37.9,31.5,30.8,27.3,30.1,29.9,51.4,18.8,36.9,25.8,24.0
-ImageLLM,[Otter](https://github.com/Luodian/Otter),LLaMA-7B,7B,PPL,33.9,35.2,30.4,44.9,38.6,32.2,30.9,26.3,31.8,32.0,51.4,31.8,37.9,27.2,24.8
 ImageLLM,[Otter](https://github.com/Luodian/Otter),MPT-7B,7B,PPL,39.7,42.9,30.6,51.3,43.5,42.3,34.2,38.4,30.9,40.2,55.3,24.7,36.8,29.2,23.8
-ImageLLM,[OpenFlamingo](https://github.com/mlfoundations/open_flamingo),LLaMA-7B,7B,PPL,33.1,34.5,29.3,43.9,38.1,31.3,30.1,27.3,30.6,29.9,50.2,20.0,37.2,25.4,24.2
-ImageLLM,[OpenFlamingo](https://github.com/mlfoundations/open_flamingo),MPT-7B,7B,PPL,40.9,42.7,35.7,53.2,45.3,40.0,31.2,39.3,32.6,36.1,51.4,25.9,42.9,34.7,26.9
-ImageLLM,[LLaMA-AdapterV2](https://github.com/OpenGVLab/LLaMA-Adapter),LLaMA-7B,7B,PPL,32.7,35.2,25.8,45.2,38.5,29.3,33.0,29.7,35.5,39.2,52.0,24.7,38.6,18.5,19.6
-ImageLLM,[GVT](https://github.com/TencentARC/GVT),Vicuna-7B,7B,PPL,33.5,35.5,27.8,41.7,35.5,31.8,29.5,36.2,32.0,32.0,51.1,27.1,33.9,25.4,23.0
-ImageLLM,[mPLUG-Owl](https://github.com/X-PLUG/mPLUG-Owl),LLaMA-7B,7B,PPL,34.0,37.9,23.0,49.7,45.3,32.5,36.7,27.3,32.7,44.3,54.7,28.8,26.7,17.9,26.5
-ImageLLM,[Kosmos-2](https://github.com/microsoft/unilm/tree/master/kosmos-2),Decoder Only 1.3B,1.3B,PPL,50.0,54.4,37.5,63.4,57.1,58.5,44.0,41.4,37.9,55.7,60.7,25.9,41.3,40.4,27.0
 ImageLLM,[Qwen-VL-Chat](https://huggingface.co/Qwen/Qwen-VL-Chat),Qwen-7B,7B,PPL for A/B/C/D,58.2,65.4,37.8,73.3,67.3,69.6,57.7,52.9,48.2,59.8,74.6,53.5,43.9,39.2,26.7
-ImageLLM,[Qwen-VL](https://huggingface.co/Qwen/Qwen-VL),Qwen-7B,7B,PPL for A/B/C/D,56.3,62.3,39.1,71.2,66.4,67.7,53.5,44.8,43.8,62.9,74.9,51.2,44.7,38.5,32.0
-ImageLLM,[Qwen-VL-plus](https://github.com/QwenLM/Qwen-VL/tree/master?tab=readme-ov-file#qwen-vl-plus),Qwen-LM,-,PPL for A/B/C/D,66.8,72.7,44.7,76.5,77.6,75.3,64.9,66.3,56.8,69.1,78.2,54.7,51.6,39.2,41.0
-ImageLLM,[IDEFICS-9b-instruct](https://huggingface.co/HuggingFaceM4/idefics-9b-instruct),LLaMA-7B,7B,NG,0.0,44.5,0.0,55.8,45.3,42.3,40.2,36.8,34.9,37.1,55.9,38.8,0.0,0.0,0.0
-ImageLLM,[IDEFICS-80b-instruct](https://huggingface.co/HuggingFaceM4/idefics-9b-instruct),LLaMA-65B,65B,NG,0.0,53.2,0.0,64.0,52.6,50.8,48.3,46.1,45.5,62.9,68.0,51.8,0.0,0.0,0.0
-ImageLLM,[InternLM-XComposer-VL](https://github.com/InternLM/InternLM-XComposer),InternLM-7B,7B,PPL,0.0,66.9,0.0,75.0,71.7,67.6,60.8,56.2,55.3,74.4,77.0,48.5,0.0,0.0,0.0
-ImageLLM,[SEED-LLaMA](https://github.com/AILab-CVC/SEED),LLaMA2-Chat-13B,13B,PPL,48.9,53.7,35.4,64.1,54.2,54.1,46.5,45.3,38.2,51.6,60.7,44.7,37.8,45.3,20.0
-ImageLLM,[mPLUG-Owl2](https://github.com/X-PLUG/mPLUG-Owl),LLaMA-7B,7B,NG,57.8,64.1,39.8,72.7,67.6,63.6,53.6,58.5,50.8,70.1,76.4,30.2,46.0,38.7,32.9
 ImageLLM,[LLaMA-VID-7B](https://github.com/dvlab-research/LLaMA-VID),LLaMA-7B,7B,Generate,59.9,67.6,37.9,75.4,71.2,68.9,62.9,58.4,50.7,70.1,76.1,54.7,42.8,35.2,35.6
-ImageLLM,[Pink-LLaMA2](https://github.com/SY-Xuan/Pink/stargazers),LLaMA2-7B,7B,NG,0.0,67.0,0.0,75.2,70.1,70.1,63.3,53.8,50.2,69.1,74.3,50.0,0.0,0.0,0.0
-ImageLLM,[InfMLLM-13B](https://github.com/mightyzau/InfMLLM),Vicuna-13B,13B,Generate,62.3,69.6,41.5,75.5,73.0,70.4,66.2,63.3,54.2,72.2,77.9,37.2,49.5,39.0,33.9
-ImageLLM,[ShareGPT4V-7B](https://github.com/InternLM/InternLM-XComposer/tree/main/projects/ShareGPT4V),Vicuna-7B,7B,Generate,0.0,69.7,0.0,75.3,71.4,72.3,63.1,62.0,53.9,70.1,79.8,54.7,0.0,0.0,0.0
-ImageLLM,[ShareGPT4V-13B](https://github.com/InternLM/InternLM-XComposer/tree/main/projects/ShareGPT4V),Vicuna-13B,13B,Generate,0.0,70.8,0.0,75.9,74.1,73.5,66.8,62.4,54.8,75.3,77.3,46.5,0.0,0.0,0.0
-ImageLLM,[Honeybee-13B](https://github.com/kakaobrain/honeybee),Vicuna-13B,13B,Generate,0.0,68.6,0.0,75.4,72.8,69.0,64.5,60.6,55.1,72.2,77.9,41.9,0.0,0.0,0.0
-ImageLLM,[SPHINXv1-1k](https://github.com/Alpha-VLLM/LLaMA2-Accessory/tree/main/SPHINX),LLaMA-2-13B,13B,Generate,63.9,71.6,34.7,75.4,72.2,75.1,64.2,68.2,49.3,66.0,78.6,62.4,41.2,33.9,26.1
 ImageLLM,[SPHINXv2-1k](https://github.com/Alpha-VLLM/LLaMA2-Accessory/tree/main/SPHINX),LLaMA-2-13B,13B,Generate,67.5,74.8,39.8,77.7,77.4,76.8,69.4,71.2,59.4,70.1,78.3,74.1,48.1,37.9,29.9
 ImageLLM,[GPT-4V](https://openai.com/research/gpt-4v-system-card),\-,-,Generate,67.3,69.1,60.5,77.5,73.9,70.6,61.8,56.8,56.9,74.2,78.5,57.6,65.7,51.7,63.4
-VideoLLM,[VideoChat](https://github.com/OpenGVLab/Ask-Anything),Vicuna-7B,7B,PPL,37.6,39.0,33.7,47.1,43.8,34.9,40.0,32.8,34.6,42.3,50.5,17.7,34.9,36.4,27.3
 VideoLLM,[Video-ChatGPT](https://github.com/mbzuai-oryx/Video-ChatGPT),LLaMA-7B,7B,PPL,31.2,33.9,23.5,37.2,31.4,33.2,28.4,35.5,29.5,23.7,42.3,25.9,27.6,21.3,21.1
-VideoLLM,[Valley](https://github.com/RupertLuo/Valley),LLaMA-13B,13B,PPL,30.3,32.0,25.4,39.3,32.9,31.6,27.9,24.2,30.1,27.8,43.8,11.8,31.3,23.2,20.7
-Other,[Unified-IO-2 7B (2.5M)](),from scratch,7B,PPL,60.5,65.6,46.0,70.7,69.0,67.4,55.4,62.6,45.5,60.8,67.1,58.1,57.5,43.2,34.0
-Other,[Unified-IO-2 7B](),from scratch,7B,PPL,60.4,65.5,46.0,71.3,68.8,67.5,55.5,61.2,45.4,62.9,66.5,59.3,58.0,42.7,34.0
-Other,[Unified-IO-2 3B (3M)](),from scratch,3B,PPL,60.2,64.1,45.6,69.0,66.6,66.5,54.3,62.0,42.3,50.5,65.3,44.2,57.5,36.2,39.4
-Other,[Unified-IO-2 3B](),from scratch,3B,PPL,58.7,63.8,44.2,68.8,65.8,67.2,52.9,60.4,43.1,55.7,64.0,41.9,57.5,36.0,39.0
-Other,[Unified-IO-2 1B](),from scratch,1B,PPL,49.6,55.1,34.0,63.8,57.7,54.6,41.9,53.7,33.3,51.5,58.3,47.7,39.8,34.5,24.6

 Model Type,Model,Language Model,Model Size,Evaluation Method,Avg. All,Avg. Img,Avg. Video,Scene Understanding,Instance Identity,Instance Attribute,Instance Location,Instance Counting,Spatial Relation,Instance Interaction,Visual Reasoning,Text Recognition,Action Recognition,Action Prediction,Procedure Understanding
+LLM,[Flan-T5](https://huggingface.co/google/flan-t5-xl),Flan-T5-XL,3B,PPL,27.7,27.3,28.6,23,29,32.8,31.8,20.5,31.8,33,18.2,19.4,23.2,34.9,25.4
 LLM,[Vicuna](https://huggingface.co/lmsys/vicuna-7b-v1.3),Vicuna-7B,7B,PPL,28.5,28.2,29.5,23.4,30.7,29.7,30.9,30.8,28.6,29.8,18.5,13.4,27.3,34.5,23.8
+LLM,[LLaMA](https://research.facebook.com/publications/llama-open-and-efficient-foundation-language-models/),LLaMA-7B,7B,PPL,26.8,26.6,27.3,26.3,27.4,26.2,28.3,25.1,28.8,19.2,37,9,33,23.1,26.2
+ImageLLM,[BLIP-2](https://github.com/salesforce/LAVIS),Flan-T5-XL,3B,PPL,46.4,49.7,36.7,59.1,53.9,49.2,42.3,43.2,36.7,55.7,45.6,25.9,32.6,47.5,24
 ImageLLM,[InstructBLIP](https://github.com/salesforce/LAVIS),Flan-T5-XL,3B,PPL,52.7,57.8,38.3,60.3,58.5,63.4,40.6,58.4,38.7,51.6,45.9,25.9,33.1,49.1,27.1
 ImageLLM,[InstructBLIP-Vicuna](https://github.com/salesforce/LAVIS),Vicuna-7B,7B,PPL,53.4,58.8,38.1,60.2,58.9,65.6,43.6,57.2,40.3,52.6,47.7,43.5,34.5,49.6,23.1
+ImageLLM,[LLaVA-1.5](https://github.com/haotian-liu/LLaVA),Vicuna-13B,13B,Generate,61.6,68.2,42.7,74.9,71.3,68.9,63.5,61.3,51.4,73.2,77,60.5,48.9,41.1,36.6
+ImageLLM,[LLaVA-v1.5-13B-LoRA](https://llava-vl.github.io),Vicuna-13B-v1.5,13B,PPL,62.4,68.2,40.5,74.9,70.9,70.1,62.5,60.6,52.4,74.2,77.3,26.7,47.5,36,35.7
+ImageLLM,[LLaVA-v1.5-LoRA](https://llava-vl.github.io),Vicuna-13B-v1.5,13B,PPL for A/B/C/D,62.8,68.9,39.5,75.2,71.4,72,62.7,59.8,51.1,71.1,80.7,39.5,46.1,37.1,32.6
 ImageLLM,[MiniGPT-4](https://github.com/Vision-CAIR/MiniGPT-4),Vicuna-7B,7B,PPL,42.8,47.4,29.9,56.3,49.2,45.8,37.9,45.3,32.6,47.4,57.1,11.8,38.2,24.5,27.1
+ImageLLM,[VPGTrans](https://github.com/VPGTrans/VPGTrans),LLaMA-7B,7B,PPL,39.1,41.8,31.4,51.9,44.1,39.9,36.1,33.7,36.4,32,53.2,30.6,39.5,24.3,31.9
+ImageLLM,[MultiModal-GPT](https://github.com/open-mmlab/Multimodal-GPT),LLaMA-7B,7B,PPL,33.2,34.5,29.2,43.6,37.9,31.5,30.8,27.3,30.1,29.9,51.4,18.8,36.9,25.8,24
+ImageLLM,[Otter](https://github.com/Luodian/Otter),LLaMA-7B,7B,PPL,33.9,35.2,30.4,44.9,38.6,32.2,30.9,26.3,31.8,32,51.4,31.8,37.9,27.2,24.8
 ImageLLM,[Otter](https://github.com/Luodian/Otter),MPT-7B,7B,PPL,39.7,42.9,30.6,51.3,43.5,42.3,34.2,38.4,30.9,40.2,55.3,24.7,36.8,29.2,23.8
+ImageLLM,[OpenFlamingo](https://github.com/mlfoundations/open_flamingo),LLaMA-7B,7B,PPL,33.1,34.5,29.3,43.9,38.1,31.3,30.1,27.3,30.6,29.9,50.2,20,37.2,25.4,24.2
+ImageLLM,[OpenFlamingo](https://github.com/mlfoundations/open_flamingo),MPT-7B,7B,PPL,40.9,42.7,35.7,53.2,45.3,40,31.2,39.3,32.6,36.1,51.4,25.9,42.9,34.7,26.9
+ImageLLM,[LLaMA-AdapterV2](https://github.com/OpenGVLab/LLaMA-Adapter),LLaMA-7B,7B,PPL,32.7,35.2,25.8,45.2,38.5,29.3,33,29.7,35.5,39.2,52,24.7,38.6,18.5,19.6
+ImageLLM,[GVT](https://github.com/TencentARC/GVT),Vicuna-7B,7B,PPL,33.5,35.5,27.8,41.7,35.5,31.8,29.5,36.2,32,32,51.1,27.1,33.9,25.4,23
+ImageLLM,[mPLUG-Owl](https://github.com/X-PLUG/mPLUG-Owl),LLaMA-7B,7B,PPL,34,37.9,23,49.7,45.3,32.5,36.7,27.3,32.7,44.3,54.7,28.8,26.7,17.9,26.5
+ImageLLM,[Kosmos-2](https://github.com/microsoft/unilm/tree/master/kosmos-2),Decoder Only 1.3B,1.3B,PPL,50,54.4,37.5,63.4,57.1,58.5,44,41.4,37.9,55.7,60.7,25.9,41.3,40.4,27
 ImageLLM,[Qwen-VL-Chat](https://huggingface.co/Qwen/Qwen-VL-Chat),Qwen-7B,7B,PPL for A/B/C/D,58.2,65.4,37.8,73.3,67.3,69.6,57.7,52.9,48.2,59.8,74.6,53.5,43.9,39.2,26.7
+ImageLLM,[Qwen-VL](https://huggingface.co/Qwen/Qwen-VL),Qwen-7B,7B,PPL for A/B/C/D,56.3,62.3,39.1,71.2,66.4,67.7,53.5,44.8,43.8,62.9,74.9,51.2,44.7,38.5,32
+ImageLLM,[Qwen-VL-plus](https://github.com/QwenLM/Qwen-VL/tree/master?tab=readme-ov-file#qwen-vl-plus),Qwen-LM,-,PPL for A/B/C/D,66.8,72.7,44.7,76.5,77.6,75.3,64.9,66.3,56.8,69.1,78.2,54.7,51.6,39.2,41
+ImageLLM,[IDEFICS-9b-instruct](https://huggingface.co/HuggingFaceM4/idefics-9b-instruct),LLaMA-7B,7B,NG,0,44.5,0,55.8,45.3,42.3,40.2,36.8,34.9,37.1,55.9,38.8,0,0,0
+ImageLLM,[IDEFICS-80b-instruct](https://huggingface.co/HuggingFaceM4/idefics-9b-instruct),LLaMA-65B,65B,NG,0,53.2,0,64,52.6,50.8,48.3,46.1,45.5,62.9,68,51.8,0,0,0
+ImageLLM,[InternLM-XComposer-VL](https://github.com/InternLM/InternLM-XComposer),InternLM-7B,7B,PPL,0,66.9,0,75,71.7,67.6,60.8,56.2,55.3,74.4,77,48.5,0,0,0
+ImageLLM,[SEED-LLaMA](https://github.com/AILab-CVC/SEED),LLaMA2-Chat-13B,13B,PPL,48.9,53.7,35.4,64.1,54.2,54.1,46.5,45.3,38.2,51.6,60.7,44.7,37.8,45.3,20
+ImageLLM,[mPLUG-Owl2](https://github.com/X-PLUG/mPLUG-Owl),LLaMA-7B,7B,NG,57.8,64.1,39.8,72.7,67.6,63.6,53.6,58.5,50.8,70.1,76.4,30.2,46,38.7,32.9
 ImageLLM,[LLaMA-VID-7B](https://github.com/dvlab-research/LLaMA-VID),LLaMA-7B,7B,Generate,59.9,67.6,37.9,75.4,71.2,68.9,62.9,58.4,50.7,70.1,76.1,54.7,42.8,35.2,35.6
+ImageLLM,[Pink-LLaMA2](https://github.com/SY-Xuan/Pink/stargazers),LLaMA2-7B,7B,NG,0,67,0,75.2,70.1,70.1,63.3,53.8,50.2,69.1,74.3,50,0,0,0
+ImageLLM,[InfMLLM-13B](https://github.com/mightyzau/InfMLLM),Vicuna-13B,13B,Generate,62.3,69.6,41.5,75.5,73,70.4,66.2,63.3,54.2,72.2,77.9,37.2,49.5,39,33.9
+ImageLLM,[ShareGPT4V-7B](https://github.com/InternLM/InternLM-XComposer/tree/main/projects/ShareGPT4V),Vicuna-7B,7B,Generate,0,69.7,0,75.3,71.4,72.3,63.1,62,53.9,70.1,79.8,54.7,0,0,0
+ImageLLM,[ShareGPT4V-13B](https://github.com/InternLM/InternLM-XComposer/tree/main/projects/ShareGPT4V),Vicuna-13B,13B,Generate,0,70.8,0,75.9,74.1,73.5,66.8,62.4,54.8,75.3,77.3,46.5,0,0,0
+ImageLLM,[Honeybee-13B](https://github.com/kakaobrain/honeybee),Vicuna-13B,13B,Generate,0,68.6,0,75.4,72.8,69,64.5,60.6,55.1,72.2,77.9,41.9,0,0,0
+ImageLLM,[SPHINXv1-1k](https://github.com/Alpha-VLLM/LLaMA2-Accessory/tree/main/SPHINX),LLaMA-2-13B,13B,Generate,63.9,71.6,34.7,75.4,72.2,75.1,64.2,68.2,49.3,66,78.6,62.4,41.2,33.9,26.1
 ImageLLM,[SPHINXv2-1k](https://github.com/Alpha-VLLM/LLaMA2-Accessory/tree/main/SPHINX),LLaMA-2-13B,13B,Generate,67.5,74.8,39.8,77.7,77.4,76.8,69.4,71.2,59.4,70.1,78.3,74.1,48.1,37.9,29.9
 ImageLLM,[GPT-4V](https://openai.com/research/gpt-4v-system-card),\-,-,Generate,67.3,69.1,60.5,77.5,73.9,70.6,61.8,56.8,56.9,74.2,78.5,57.6,65.7,51.7,63.4
+VideoLLM,[VideoChat](https://github.com/OpenGVLab/Ask-Anything),Vicuna-7B,7B,PPL,37.6,39,33.7,47.1,43.8,34.9,40,32.8,34.6,42.3,50.5,17.7,34.9,36.4,27.3
 VideoLLM,[Video-ChatGPT](https://github.com/mbzuai-oryx/Video-ChatGPT),LLaMA-7B,7B,PPL,31.2,33.9,23.5,37.2,31.4,33.2,28.4,35.5,29.5,23.7,42.3,25.9,27.6,21.3,21.1
+VideoLLM,[Valley](https://github.com/RupertLuo/Valley),LLaMA-13B,13B,PPL,30.3,32,25.4,39.3,32.9,31.6,27.9,24.2,30.1,27.8,43.8,11.8,31.3,23.2,20.7
+Other,[Unified-IO-2 7B (2.5M)](),from scratch,7B,PPL,60.5,65.6,46,70.7,69,67.4,55.4,62.6,45.5,60.8,67.1,58.1,57.5,43.2,34
+Other,[Unified-IO-2 7B](),from scratch,7B,PPL,60.4,65.5,46,71.3,68.8,67.5,55.5,61.2,45.4,62.9,66.5,59.3,58,42.7,34
+Other,[Unified-IO-2 3B (3M)](),from scratch,3B,PPL,60.2,64.1,45.6,69,66.6,66.5,54.3,62,42.3,50.5,65.3,44.2,57.5,36.2,39.4
+Other,[Unified-IO-2 3B](),from scratch,3B,PPL,58.7,63.8,44.2,68.8,65.8,67.2,52.9,60.4,43.1,55.7,64,41.9,57.5,36,39
+Other,[Unified-IO-2 1B](),from scratch,1B,PPL,49.6,55.1,34,63.8,57.7,54.6,41.9,53.7,33.3,51.5,58.3,47.7,39.8,34.5,24.6

file/result_task.csv ADDED Viewed

	@@ -0,0 +1,46 @@

+Model Type,Model,Language Model,Model Size,Evaluation Method,Avg. All,Avg. Img,Avg. Video,Scene Understanding,Instance Identity,Instance Attribute,Instance Location,Instance Counting,Spatial Relation,Instance Interaction,Visual Reasoning,Text Recognition,Action Recognition,Action Prediction,Procedure Understanding
+LLM,[Flan-T5](https://huggingface.co/google/flan-t5-xl),Flan-T5-XL,3B,PPL,26.9,26.6,27.8,23,29,32.8,31.8,20.5,31.8,33,18.2,19.4,23.2,34.9,25.4
+LLM,[Vicuna](https://huggingface.co/lmsys/vicuna-7b-v1.3),Vicuna-7B,7B,PPL,26.8,26.2,28.5,23.4,30.7,29.7,30.9,30.8,28.6,29.8,18.5,13.4,27.3,34.5,23.8
+LLM,[LLaMA](https://research.facebook.com/publications/llama-open-and-efficient-foundation-language-models/),LLaMA-7B,7B,PPL,25.8,25.3,27.4,26.3,27.4,26.2,28.3,25.1,28.8,19.2,37,9,33,23.1,26.2
+ImageLLM,[BLIP-2](https://github.com/salesforce/LAVIS),Flan-T5-XL,3B,PPL,43.0,45.7,34.7,59.1,53.9,49.2,42.3,43.2,36.7,55.7,45.6,25.9,32.6,47.5,24
+ImageLLM,[InstructBLIP](https://github.com/salesforce/LAVIS),Flan-T5-XL,3B,PPL,46.1,49.3,36.4,60.3,58.5,63.4,40.6,58.4,38.7,51.6,45.9,25.9,33.1,49.1,27.1
+ImageLLM,[InstructBLIP-Vicuna](https://github.com/salesforce/LAVIS),Vicuna-7B,7B,PPL,48.1,52.2,35.7,60.2,58.9,65.6,43.6,57.2,40.3,52.6,47.7,43.5,34.5,49.6,23.1
+ImageLLM,[LLaVA-1.5](https://github.com/haotian-liu/LLaVA),Vicuna-13B,13B,Generate,60.7,66.9,42.2,74.9,71.3,68.9,63.5,61.3,51.4,73.2,77,60.5,48.9,41.1,36.6
+ImageLLM,[LLaVA-v1.5-13B-LoRA](https://llava-vl.github.io),Vicuna-13B-v1.5,13B,PPL,57.4,63.3,39.7,74.9,70.9,70.1,62.5,60.6,52.4,74.2,77.3,26.7,47.5,36,35.7
+ImageLLM,[LLaVA-v1.5-LoRA](https://llava-vl.github.io),Vicuna-13B-v1.5,13B,PPL for A/B/C/D,58.3,64.8,38.6,75.2,71.4,72,62.7,59.8,51.1,71.1,80.7,39.5,46.1,37.1,32.6
+ImageLLM,[MiniGPT-4](https://github.com/Vision-CAIR/MiniGPT-4),Vicuna-7B,7B,PPL,39.4,42.6,29.9,56.3,49.2,45.8,37.9,45.3,32.6,47.4,57.1,11.8,38.2,24.5,27.1
+ImageLLM,[VPGTrans](https://github.com/VPGTrans/VPGTrans),LLaMA-7B,7B,PPL,37.8,39.8,31.9,51.9,44.1,39.9,36.1,33.7,36.4,32,53.2,30.6,39.5,24.3,31.9
+ImageLLM,[MultiModal-GPT](https://github.com/open-mmlab/Multimodal-GPT),LLaMA-7B,7B,PPL,32.3,33.5,28.9,43.6,37.9,31.5,30.8,27.3,30.1,29.9,51.4,18.8,36.9,25.8,24
+ImageLLM,[Otter](https://github.com/Luodian/Otter),LLaMA-7B,7B,PPL,34.2,35.5,30.0,44.9,38.6,32.2,30.9,26.3,31.8,32,51.4,31.8,37.9,27.2,24.8
+ImageLLM,[Otter](https://github.com/Luodian/Otter),MPT-7B,7B,PPL,37.6,40.1,29.9,51.3,43.5,42.3,34.2,38.4,30.9,40.2,55.3,24.7,36.8,29.2,23.8
+ImageLLM,[OpenFlamingo](https://github.com/mlfoundations/open_flamingo),LLaMA-7B,7B,PPL,32.4,33.5,28.9,43.9,38.1,31.3,30.1,27.3,30.6,29.9,50.2,20,37.2,25.4,24.2
+ImageLLM,[OpenFlamingo](https://github.com/mlfoundations/open_flamingo),MPT-7B,7B,PPL,38.3,39.4,34.8,53.2,45.3,40,31.2,39.3,32.6,36.1,51.4,25.9,42.9,34.7,26.9
+ImageLLM,[LLaMA-AdapterV2](https://github.com/OpenGVLab/LLaMA-Adapter),LLaMA-7B,7B,PPL,33.7,36.3,25.6,45.2,38.5,29.3,33,29.7,35.5,39.2,52,24.7,38.6,18.5,19.6
+ImageLLM,[GVT](https://github.com/TencentARC/GVT),Vicuna-7B,7B,PPL,33.3,35.2,27.4,41.7,35.5,31.8,29.5,36.2,32,32,51.1,27.1,33.9,25.4,23
+ImageLLM,[mPLUG-Owl](https://github.com/X-PLUG/mPLUG-Owl),LLaMA-7B,7B,PPL,35.3,39.1,23.7,49.7,45.3,32.5,36.7,27.3,32.7,44.3,54.7,28.8,26.7,17.9,26.5
+ImageLLM,[Kosmos-2](https://github.com/microsoft/unilm/tree/master/kosmos-2),Decoder Only 1.3B,1.3B,PPL,46.1,49.4,36.2,63.4,57.1,58.5,44,41.4,37.9,55.7,60.7,25.9,41.3,40.4,27
+ImageLLM,[Qwen-VL-Chat](https://huggingface.co/Qwen/Qwen-VL-Chat),Qwen-7B,7B,PPL for A/B/C/D,55.6,61.9,36.6,73.3,67.3,69.6,57.7,52.9,48.2,59.8,74.6,53.5,43.9,39.2,26.7
+ImageLLM,[Qwen-VL](https://huggingface.co/Qwen/Qwen-VL),Qwen-7B,7B,PPL for A/B/C/D,54.3,59.6,38.4,71.2,66.4,67.7,53.5,44.8,43.8,62.9,74.9,51.2,44.7,38.5,32
+ImageLLM,[Qwen-VL-plus](https://github.com/QwenLM/Qwen-VL/tree/master?tab=readme-ov-file#qwen-vl-plus),Qwen-LM,-,PPL for A/B/C/D,62.6,68.8,43.9,76.5,77.6,75.3,64.9,66.3,56.8,69.1,78.2,54.7,51.6,39.2,41
+ImageLLM,[IDEFICS-9b-instruct](https://huggingface.co/HuggingFaceM4/idefics-9b-instruct),LLaMA-7B,7B,NG,32.3,43.0,0.0,55.8,45.3,42.3,40.2,36.8,34.9,37.1,55.9,38.8,0,0,0
+ImageLLM,[IDEFICS-80b-instruct](https://huggingface.co/HuggingFaceM4/idefics-9b-instruct),LLaMA-65B,65B,NG,40.8,54.4,0.0,64,52.6,50.8,48.3,46.1,45.5,62.9,68,51.8,0,0,0
+ImageLLM,[InternLM-XComposer-VL](https://github.com/InternLM/InternLM-XComposer),InternLM-7B,7B,PPL,48.9,65.2,0.0,75,71.7,67.6,60.8,56.2,55.3,74.4,77,48.5,0,0,0
+ImageLLM,[SEED-LLaMA](https://github.com/AILab-CVC/SEED),LLaMA2-Chat-13B,13B,PPL,46.9,51.0,34.4,64.1,54.2,54.1,46.5,45.3,38.2,51.6,60.7,44.7,37.8,45.3,20
+ImageLLM,[mPLUG-Owl2](https://github.com/X-PLUG/mPLUG-Owl),LLaMA-7B,7B,NG,55.1,60.4,39.2,72.7,67.6,63.6,53.6,58.5,50.8,70.1,76.4,30.2,46,38.7,32.9
+ImageLLM,[LLaMA-VID-7B](https://github.com/dvlab-research/LLaMA-VID),LLaMA-7B,7B,Generate,58.5,65.4,37.9,75.4,71.2,68.9,62.9,58.4,50.7,70.1,76.1,54.7,42.8,35.2,35.6
+ImageLLM,[Pink-LLaMA2](https://github.com/SY-Xuan/Pink/stargazers),LLaMA2-7B,7B,NG,48.0,64.0,0.0,75.2,70.1,70.1,63.3,53.8,50.2,69.1,74.3,50,0,0,0
+ImageLLM,[InfMLLM-13B](https://github.com/mightyzau/InfMLLM),Vicuna-13B,13B,Generate,59.4,65.5,40.8,75.5,73,70.4,66.2,63.3,54.2,72.2,77.9,37.2,49.5,39,33.9
+ImageLLM,[ShareGPT4V-7B](https://github.com/InternLM/InternLM-XComposer/tree/main/projects/ShareGPT4V),Vicuna-7B,7B,Generate,50.2,67.0,0.0,75.3,71.4,72.3,63.1,62,53.9,70.1,79.8,54.7,0,0,0
+ImageLLM,[ShareGPT4V-13B](https://github.com/InternLM/InternLM-XComposer/tree/main/projects/ShareGPT4V),Vicuna-13B,13B,Generate,50.6,67.4,0.0,75.9,74.1,73.5,66.8,62.4,54.8,75.3,77.3,46.5,0,0,0
+ImageLLM,[Honeybee-13B](https://github.com/kakaobrain/honeybee),Vicuna-13B,13B,Generate,49.1,65.5,0.0,75.4,72.8,69,64.5,60.6,55.1,72.2,77.9,41.9,0,0,0
+ImageLLM,[SPHINXv1-1k](https://github.com/Alpha-VLLM/LLaMA2-Accessory/tree/main/SPHINX),LLaMA-2-13B,13B,Generate,59.4,67.9,33.7,75.4,72.2,75.1,64.2,68.2,49.3,66,78.6,62.4,41.2,33.9,26.1
+ImageLLM,[SPHINXv2-1k](https://github.com/Alpha-VLLM/LLaMA2-Accessory/tree/main/SPHINX),LLaMA-2-13B,13B,Generate,64.2,72.7,38.6,77.7,77.4,76.8,69.4,71.2,59.4,70.1,78.3,74.1,48.1,37.9,29.9
+ImageLLM,[GPT-4V](https://openai.com/research/gpt-4v-system-card),\-,-,Generate,65.7,67.5,60.3,77.5,73.9,70.6,61.8,56.8,56.9,74.2,78.5,57.6,65.7,51.7,63.4
+VideoLLM,[VideoChat](https://github.com/OpenGVLab/Ask-Anything),Vicuna-7B,7B,PPL,36.9,38.2,32.9,47.1,43.8,34.9,40,32.8,34.6,42.3,50.5,17.7,34.9,36.4,27.3
+VideoLLM,[Video-ChatGPT](https://github.com/mbzuai-oryx/Video-ChatGPT),LLaMA-7B,7B,PPL,29.8,31.9,23.3,37.2,31.4,33.2,28.4,35.5,29.5,23.7,42.3,25.9,27.6,21.3,21.1
+VideoLLM,[Valley](https://github.com/RupertLuo/Valley),LLaMA-13B,13B,PPL,28.7,29.9,25.1,39.3,32.9,31.6,27.9,24.2,30.1,27.8,43.8,11.8,31.3,23.2,20.7
+Other,[Unified-IO-2 7B (2.5M)](),from scratch,7B,PPL,57.6,61.8,44.9,70.7,69,67.4,55.4,62.6,45.5,60.8,67.1,58.1,57.5,43.2,34
+Other,[Unified-IO-2 7B](),from scratch,7B,PPL,57.8,62.0,44.9,71.3,68.8,67.5,55.5,61.2,45.4,62.9,66.5,59.3,58,42.7,34
+Other,[Unified-IO-2 3B (3M)](),from scratch,3B,PPL,54.5,57.9,44.4,69,66.6,66.5,54.3,62,42.3,50.5,65.3,44.2,57.5,36.2,39.4
+Other,[Unified-IO-2 3B](),from scratch,3B,PPL,54.4,57.8,44.2,68.8,65.8,67.2,52.9,60.4,43.1,55.7,64,41.9,57.5,36,39
+Other,[Unified-IO-2 1B](),from scratch,1B,PPL,46.8,51.4,33.0,63.8,57.7,54.6,41.9,53.7,33.3,51.5,58.3,47.7,39.8,34.5,24.6

file/result_v2.csv CHANGED Viewed

@@ -1,28 +1,28 @@
-Model,Language Model,Model Size,Evaluation Method,Avg. P1,Avg. P2,Avg. P3,Scene Understanding,Instance Identity,Instance Attribute,Instance Location,Instance Counting,Spatial Relation,Instance Interaction,Visual Reasoning,Text Recognition,Celebrity Recognition,Landmark Recognition,Chart Understanding,Visual Referring Expression,Science Knowledge,Emotion Recognition,Visual Mathematics,Difference Spotting,Meme Comprehension,Global Video Understanding,Action Recognition,Action Predicion,Procedure Understanding,In-Context Captioning,Interleaved Image-Text Analysis,Text-to-Image Generation,Next Image Prediction,Text-Image Creation
-[BLIP-2](https://github.com/salesforce/LAVIS),Flan-T5-XL,3B,PPL,41.0,35.3,0.0,58.5,48.6,49.0,39.1,43.4,36.2,48.5,52.9,60.7,51.8,51.4,19.2,43.2,52.4,29.3,22.0,17.8,38.6,42.5,37.7,36.2,22.9,40.0,30.6,0.0,0.0,0.0
-[InstructBLIP](https://github.com/salesforce/LAVIS),Flan-T5-XL,3B,PPL,42.2,35.7,0.0,58.9,49.7,61.7,35.1,58.1,34.9,47.4,55.9,61.4,48.5,45.4,26.4,41.7,47.7,34.5,21.2,22.8,35.2,41.5,36.1,40.5,24.5,36.7,34.7,0.0,0.0,0.0
-[InstructBLIP-Vicuna](https://github.com/salesforce/LAVIS),Vicuna-7B,7B,PPL,41.4,29.7,0.0,53.6,43.9,49.0,37.8,56.5,35.8,43.3,56.2,57.2,60.3,44.4,27.9,39.2,39.4,23.0,26.5,36.5,55.4,40.4,38.6,31.2,15.6,26.7,32.7,0.0,0.0,0.0
-[LLaVA](https://github.com/haotian-liu/LLaVA),LLaMA-7B,7B,PPL,38.7,30.2,0.0,53.8,47.5,38.3,34.2,42.0,34.7,40.2,52.9,46.4,51.8,45.6,30.3,40.2,37.6,34.3,20.5,27.0,50.0,44.1,36.2,25.1,18.6,40.0,20.4,0.0,0.0,0.0
-[MiniGPT-4](https://github.com/Vision-CAIR/MiniGPT-4),Vicuna-7B,7B,PPL,39.4,34.1,0.0,56.3,49.2,45.8,37.9,45.3,32.6,47.4,57.1,41.8,55.2,45.2,20.2,41.2,43.3,24.2,25.0,19.0,46.7,39.0,38.7,27.4,28.6,45.8,22.5,0.0,0.0,0.0
-[VPGTrans](https://github.com/VPGTrans/VPGTrans),LLaMA-7B,7B,PPL,36.2,23.9,0.0,46.9,38.6,33.6,35.6,27.5,34.4,33.0,50.8,47.6,52.4,38.2,30.1,34.7,36.1,31.5,27.3,24.6,44.0,37.8,38.2,20.9,33.5,19.2,28.6,0.0,0.0,0.0
-[MultiModal-GPT](https://github.com/open-mmlab/Multimodal-GPT),LLaMA-7B,7B,PPL,37.4,34.9,0.0,46.9,42.5,32.0,32.3,27.7,29.7,29.9,48.3,35.2,60.9,50.4,24.2,42.2,37.6,32.1,27.3,40.1,56.5,37.6,38.7,25.3,24.4,39.2,30.6,0.0,0.0,0.0
-[Otter](https://github.com/Luodian/Otter),LLaMA-7B,7B,PPL,36.4,36.6,0.0,45.9,39.7,31.9,31.6,26.4,32.0,33.0,49.2,39.3,59.7,53.0,23.6,41.2,36.1,37.3,22.0,27.4,46.7,36.6,37.9,26.0,24.8,42.5,30.6,0.0,0.0,0.0
-[OpenFlamingo](https://github.com/mlfoundations/open_flamingo),LLaMA-7B,7B,PPL,37.3,35.5,0.0,46.7,42.3,31.7,33.4,27.4,29.8,29.9,47.7,35.6,60.3,49.8,24.2,42.2,39.0,32.1,27.3,39.9,54.9,37.6,38.4,25.2,24.1,38.3,32.7,0.0,0.0,0.0
-[LLaMA-AdapterV2](https://github.com/OpenGVLab/LLaMA-Adapter),LLaMA-7B,7B,PPL,37.5,0.0,0.0,45.2,38.5,29.3,33.0,29.7,35.5,39.2,52.0,48.7,58.5,46.4,24.2,41.2,40.1,39.7,23.5,29.1,52.2,41.9,38.2,18.8,20.3,0.0,0.0,0.0,0.0,0.0
-[GVT](https://github.com/TencentARC/GVT),Vicuna-7B,7B,PPL,34.4,38.6,0.0,41.7,35.5,31.8,29.5,36.2,32.0,32.0,51.1,35.2,39.4,36.4,25.0,36.2,31.1,20.6,22.7,41.5,59.2,40.4,29.7,26.3,24.1,42.5,34.7,0.0,0.0,0.0
-[mPLUG-Owl](https://github.com/X-PLUG/mPLUG-Owl),LLaMA-7B,7B,PPL,39.4,28.9,0.0,49.7,45.3,32.5,36.7,27.3,32.7,44.3,54.7,49.2,70.9,49.6,23.2,44.2,44.0,32.5,23.5,33.5,54.9,42.0,37.8,18.3,19.3,29.2,28.6,0.0,0.0,0.0
-[Kosmos-2](https://github.com/microsoft/unilm/tree/master/kosmos-2),Decoder only 1.3B,1.3B,PPL,46.3,23.3,0.0,63.4,57.1,58.5,44.0,41.4,37.9,55.7,60.7,68.1,82.1,51.4,21.2,48.2,43.7,30.7,28.0,25.2,42.8,48.5,40.8,39.5,30.0,24.2,22.5,0.0,0.0,0.0
-[Qwen-VL-Chat](https://huggingface.co/Qwen/Qwen-VL-Chat),Qwen-7B,7B,PPL,43.1,35.5,0.0,56.5,47.6,54.8,46.9,54.2,40.3,55.7,55.0,47.4,62.4,55.6,25.2,43.7,41.2,20.6,28.8,34.3,47.2,39.7,42.8,29.6,19.1,42.5,28.6,0.0,0.0,0.0
-[Qwen-VL-plus](https://github.com/QwenLM/Qwen-VL/tree/master?tab=readme-ov-file#qwen-vl-plus),Qwen-LM,-,PPL for A/B/C/D,65.3,30.5,0.0,76.6,77.7,76.3,65.1,65.8,55.9,73.2,77.9,61.8,97.0,97.2,39.5,73.4,75.8,51.7,38.6,66.7,81.8,51.8,54.5,29.3,48.0,28.3,32.7,0.0,0.0,0.0
-[LLaVA-1.5](https://github.com/haotian-liu/LLaVA),vicuna-7B,7B,PPL,47.3,30.8,0.0,63.7,62.4,66.7,51.3,60.2,38.5,47.4,59.8,69.0,60.6,49.8,25.0,45.7,56.7,31.1,24.2,35.7,50.3,46.1,39.4,29.4,28.1,39.2,22.5,0.0,0.0,0.0
-[IDEFICS-9b-instruct](https://huggingface.co/HuggingFaceM4/idefics-9b-instruct),LLaMA-7B,7B,PPL,38.0,40.3,0.0,48.2,38.2,37.8,32.9,29.0,32.4,37.1,54.1,45.5,52.4,52.8,22.6,42.7,33.2,26.6,21.2,56.5,48.4,42.7,38.6,23.6,20.5,45.8,34.7,0.0,0.0,0.0
-[InternLM-XComposer-VL](https://github.com/InternLM/InternLM-XComposer),InternLM-7B,7B,PPL,59.2,32.1,0.0,74.8,70.5,67.6,60.5,55.3,53.4,76.3,76.1,61.4,86.1,78.0,27.2,60.3,84.8,68.9,25.8,47.7,56.6,58.6,49.9,37.6,24.9,27.5,36.7,0.0,0.0,0.0
-[SPHINXv1-1k](https://github.com/Alpha-VLLM/LLaMA2-Accessory/tree/main/SPHINX),LLaMA-2-13B,13B,Generate,56.4,31.9,0.0,75.5,72.4,75.1,63.1,67.6,50.4,64.9,76.7,60.0,81.5,82.4,21.8,60.3,58.5,65.5,32.6,35.9,43.4,52.4,41.2,33.9,26.1,33.3,30.6,0.0,0.0,0.0
-[SPHINXv2-1k](https://github.com/Alpha-VLLM/LLaMA2-Accessory/tree/main/SPHINX),LLaMA-2-13B,13B,Generate,60.2,37.0,0.0,77.5,78.5,76.6,69.0,71.0,57.5,73.2,77.6,62.1,82.7,85.2,44.5,62.3,60.3,65.3,23.5,45.7,42.1,54.6,48.1,37.9,29.9,33.3,40.8,0.0,0.0,0.0
-[Emu](https://github.com/baaivision/Emu),LLaMA-13B,13B,PPL,42.5,41.1,41.4,59.0,50.0,43.7,37.1,44.3,33.6,49.5,58.3,61.4,68.8,61.6,19.0,45.7,41.5,24.2,26.4,29.3,37.1,41.9,42.7,37.9,21.8,51.7,30.6,46.8,43.2,34.2
-[Next-GPT](https://github.com/NExT-GPT/NExT-GPT),vicuna-7B,7B,PPL,30.7,35.6,33.9,36.4,35.1,25.6,29.9,36.1,30.9,39.2,41.7,31.0,30.9,27.4,21.2,34.2,31.8,24.4,17.4,24.2,39.0,35.5,33.8,25.6,24.5,46.7,24.5,45.1,19.8,36.7
-[SEED-LLaMA](https://github.com/AILab-CVC/SEED),LLaMA2-Chat-13B,13B,PPL,43.9,43.4,52.3,64.0,55.0,51.3,45.4,43.3,37.9,56.7,59.2,57.0,55.5,52.8,18.8,49.3,44.8,28.8,24.4,29.5,41.5,46.7,39.4,43.9,20.3,54.2,32.7,50.2,40.7,65.8
-[GPT-4V](https://openai.com/research/gpt-4v-system-card),\-,-,Generate,69.2,44.2,0.0,77.5,73.9,70.6,61.8,56.8,56.9,74.2,78.5,82.3,91.8,97.4,45.1,71.9,66.1,71.1,43.9,67.9,89.3,64.5,65.7,51.7,63.4,29.2,59.2,0.0,0.0,0.0
-[VideoChat](https://github.com/OpenGVLab/Ask-Anything),Vicuna-7B,7B,PPL,37.0,35.3,0.0,44.3,40.7,32.2,36.9,32.9,32.6,42.3,51.1,45.7,35.2,46.8,20.6,43.2,39.4,34.3,19.7,30.3,51.6,41.5,34.0,30.6,27.4,40.0,30.6,0.0,0.0,0.0
-[Video-ChatGPT](https://github.com/mbzuai-oryx/Video-ChatGPT),LLaMA-7B,7B,PPL,36.4,31.0,0.0,44.1,37.0,35.8,30.7,44.2,31.1,29.9,49.9,39.8,49.7,40.6,22.0,33.2,37.2,22.4,25.0,46.1,61.4,42.6,32.2,27.0,19.0,37.5,24.5,0.0,0.0,0.0
-[Valley](https://github.com/RupertLuo/Valley),LLaMA-13B,13B,PPL,34.5,32.2,0.0,45.3,36.4,33.7,30.6,27.1,31.5,35.1,52.0,35.2,44.9,43.4,23.8,33.2,37.2,26.0,22.7,37.1,52.2,31.5,32.1,21.9,26.5,35.8,28.6,0.0,0.0,0.0

+Model,Language Model,Model Size,Evaluation Method,Avg. Single,Avg. Multi,Avg. Video,Avg. P1,Avg. P2,Avg. P3,Scene Understanding,Instance Identity,Instance Attribute,Instance Location,Instance Counting,Spatial Relation,Instance Interaction,Visual Reasoning,Text Recognition,Celebrity Recognition,Landmark Recognition,Chart Understanding,Visual Referring Expression,Science Knowledge,Emotion Recognition,Visual Mathematics,Difference Spotting,Meme Comprehension,Global Video Understanding,Action Recognition,Action Predicion,Procedure Understanding,In-Context Captioning,Interleaved Image-Text Analysis,Text-to-Image Generation,Next Image Prediction,Text-Image Creation
+[BLIP-2](https://github.com/salesforce/LAVIS),Flan-T5-XL,3B,PPL,46.8,22.8,36.0,44.2,37.3,0.0,58.5,48.6,49,39.1,43.4,36.2,48.5,52.9,60.7,51.8,51.4,19.2,43.2,52.4,29.3,22,17.8,38.6,42.5,37.7,36.2,22.9,40,30.6,0,0,0
+[InstructBLIP](https://github.com/salesforce/LAVIS),Flan-T5-XL,3B,PPL,52.4,25.8,36.5,48.6,36.1,0.0,58.9,49.7,61.7,35.1,58.1,34.9,47.4,55.9,61.4,48.5,45.4,26.4,41.7,47.7,34.5,21.2,22.8,35.2,41.5,36.1,40.5,24.5,36.7,34.7,0,0,0
+[InstructBLIP-Vicuna](https://github.com/salesforce/LAVIS),Vicuna-7B,7B,PPL,47.5,41.1,33.0,44.2,28.4,0.0,53.6,43.9,49,37.8,56.5,35.8,43.3,56.2,57.2,60.3,44.4,27.9,39.2,39.4,23,26.5,36.5,55.4,40.4,38.6,31.2,15.6,26.7,32.7,0,0,0
+[LLaVA](https://github.com/haotian-liu/LLaVA),LLaMA-7B,7B,PPL,42.4,32.5,32.6,40.2,34.3,0.0,53.8,47.5,38.3,34.2,42,34.7,40.2,52.9,46.4,51.8,45.6,30.3,40.2,37.6,34.3,20.5,27,50,44.1,36.2,25.1,18.6,40,20.4,0,0,0
+[MiniGPT-4](https://github.com/Vision-CAIR/MiniGPT-4),Vicuna-7B,7B,PPL,45.0,25.7,34.3,42.5,39.0,0.0,56.3,49.2,45.8,37.9,45.3,32.6,47.4,57.1,41.8,55.2,45.2,20.2,41.2,43.3,24.2,25,19,46.7,39,38.7,27.4,28.6,45.8,22.5,0,0,0
+[VPGTrans](https://github.com/VPGTrans/VPGTrans),LLaMA-7B,7B,PPL,36.6,29.3,33.2,35.8,21.9,0.0,46.9,38.6,33.6,35.6,27.5,34.4,33,50.8,47.6,52.4,38.2,30.1,34.7,36.1,31.5,27.3,24.6,44,37.8,38.2,20.9,33.5,19.2,28.6,0,0,0
+[MultiModal-GPT](https://github.com/open-mmlab/Multimodal-GPT),LLaMA-7B,7B,PPL,36.7,44.1,32.6,35.9,36.7,0.0,46.9,42.5,32,32.3,27.7,29.7,29.9,48.3,35.2,60.9,50.4,24.2,42.2,37.6,32.1,27.3,40.1,56.5,37.6,38.7,25.3,24.4,39.2,30.6,0,0,0
+[Otter](https://github.com/Luodian/Otter),LLaMA-7B,7B,PPL,36.0,32.0,32.3,35.2,39.0,0.0,45.9,39.7,31.9,31.6,26.4,32,33,49.2,39.3,59.7,53,23.6,41.2,36.1,37.3,22,27.4,46.7,36.6,37.9,26,24.8,42.5,30.6,0,0,0
+[OpenFlamingo](https://github.com/mlfoundations/open_flamingo),LLaMA-7B,7B,PPL,36.6,43.5,32.4,35.8,36.7,0.0,46.7,42.3,31.7,33.4,27.4,29.8,29.9,47.7,35.6,60.3,49.8,24.2,42.2,39,32.1,27.3,39.9,54.9,37.6,38.4,25.2,24.1,38.3,32.7,0,0,0
+[LLaMA-AdapterV2](https://github.com/OpenGVLab/LLaMA-Adapter),LLaMA-7B,7B,PPL,36.0,34.7,31.4,35.1,0.0,0.0,45.2,38.5,29.3,33,29.7,35.5,39.2,52,48.7,58.5,46.4,24.2,41.2,40.1,39.7,23.5,29.1,52.2,41.9,38.2,18.8,20.3,0,0,0,0,0
+[GVT](https://github.com/TencentARC/GVT),Vicuna-7B,7B,PPL,34.9,45.8,31.0,34.2,40.2,0.0,41.7,35.5,31.8,29.5,36.2,32,32,51.1,35.2,39.4,36.4,25,36.2,31.1,20.6,22.7,41.5,59.2,40.4,29.7,26.3,24.1,42.5,34.7,0,0,0
+[mPLUG-Owl](https://github.com/X-PLUG/mPLUG-Owl),LLaMA-7B,7B,PPL,38.6,38.7,31.1,36.9,29.0,0.0,49.7,45.3,32.5,36.7,27.3,32.7,44.3,54.7,49.2,70.9,49.6,23.2,44.2,44,32.5,23.5,33.5,54.9,42,37.8,18.3,19.3,29.2,28.6,0,0,0
+[Kosmos-2](https://github.com/microsoft/unilm/tree/master/kosmos-2),Decoder only 1.3B,1.3B,PPL,52.4,29.4,40.7,49.6,23.7,0.0,63.4,57.1,58.5,44,41.4,37.9,55.7,60.7,68.1,82.1,51.4,21.2,48.2,43.7,30.7,28,25.2,42.8,48.5,40.8,39.5,30,24.2,22.5,0,0,0
+[Qwen-VL-Chat](https://huggingface.co/Qwen/Qwen-VL-Chat),Qwen-7B,7B,PPL,50.3,37.4,34.3,46.6,38.5,0.0,56.5,47.6,54.8,46.9,54.2,40.3,55.7,55,47.4,62.4,55.6,25.2,43.7,41.2,20.6,28.8,34.3,47.2,39.7,42.8,29.6,19.1,42.5,28.6,0,0,0
+[Qwen-VL-plus](https://github.com/QwenLM/Qwen-VL/tree/master?tab=readme-ov-file#qwen-vl-plus),Qwen-LM,-,PPL for A/B/C/D,71.9,70.3,46.7,66.1,29.6,0.0,76.6,77.7,76.3,65.1,65.8,55.9,73.2,77.9,61.8,97,97.2,39.5,73.4,75.8,51.7,38.6,66.7,81.8,51.8,54.5,29.3,48,28.3,32.7,0,0,0
+[LLaVA-1.5](https://github.com/haotian-liu/LLaVA),vicuna-7B,7B,PPL,58.3,39.2,36.9,53.3,34.4,0.0,63.7,62.4,66.7,51.3,60.2,38.5,47.4,59.8,69,60.6,49.8,25,45.7,56.7,31.1,24.2,35.7,50.3,46.1,39.4,29.4,28.1,39.2,22.5,0,0,0
+[IDEFICS-9b-instruct](https://huggingface.co/HuggingFaceM4/idefics-9b-instruct),LLaMA-7B,7B,PPL,38.8,54.5,32.9,37.5,42.6,0.0,48.2,38.2,37.8,32.9,29,32.4,37.1,54.1,45.5,52.4,52.8,22.6,42.7,33.2,26.6,21.2,56.5,48.4,42.7,38.6,23.6,20.5,45.8,34.7,0,0,0
+[InternLM-XComposer-VL](https://github.com/InternLM/InternLM-XComposer),InternLM-7B,7B,PPL,65.4,49.8,44.9,60.6,30.2,0.0,74.8,70.5,67.6,60.5,55.3,53.4,76.3,76.1,61.4,86.1,78,27.2,60.3,84.8,68.9,25.8,47.7,56.6,58.6,49.9,37.6,24.9,27.5,36.7,0,0,0
+[SPHINXv1-1k](https://github.com/Alpha-VLLM/LLaMA2-Accessory/tree/main/SPHINX),LLaMA-2-13B,13B,Generate,68.5,37.7,40.0,61.7,32.5,0.0,75.5,72.4,75.1,63.1,67.6,50.4,64.9,76.7,60,81.5,82.4,21.8,60.3,58.5,65.5,32.6,35.9,43.4,52.4,41.2,33.9,26.1,33.3,30.6,0,0,0
+[SPHINXv2-1k](https://github.com/Alpha-VLLM/LLaMA2-Accessory/tree/main/SPHINX),LLaMA-2-13B,13B,Generate,72.1,44.8,44.2,65.4,35.5,0.0,77.5,78.5,76.6,69,71,57.5,73.2,77.6,62.1,82.7,85.2,44.5,62.3,60.3,65.3,23.5,45.7,42.1,54.6,48.1,37.9,29.9,33.3,40.8,0,0,0
+[Emu](https://github.com/baaivision/Emu),LLaMA-13B,13B,PPL,46.4,31.2,37.4,44.2,45.6,45.7,59,50,43.7,37.1,44.3,33.6,49.5,58.3,61.4,68.8,61.6,19,45.7,41.5,24.2,26.4,29.3,37.1,41.9,42.7,37.9,21.8,51.7,30.6,46.8,43.2,34.2
+[Next-GPT](https://github.com/NExT-GPT/NExT-GPT),vicuna-7B,7B,PPL,31.0,27.8,30.7,31.0,40.3,42.8,36.4,35.1,25.6,29.9,36.1,30.9,39.2,41.7,31,30.9,27.4,21.2,34.2,31.8,24.4,17.4,24.2,39,35.5,33.8,25.6,24.5,46.7,24.5,45.1,19.8,36.7
+[SEED-LLaMA](https://github.com/AILab-CVC/SEED),LLaMA2-Chat-13B,13B,PPL,49.9,32.4,39.0,47.3,48.0,50.6,64,55,51.3,45.4,43.3,37.9,56.7,59.2,57,55.5,52.8,18.8,49.3,44.8,28.8,24.4,29.5,41.5,46.7,39.4,43.9,20.3,54.2,32.7,50.2,40.7,65.8
+[GPT-4V](https://openai.com/research/gpt-4v-system-card),\-,-,Generate,69.8,73.1,61.7,68.1,37.9,0.0,77.5,73.9,70.6,61.8,56.8,56.9,74.2,78.5,82.3,91.8,97.4,45.1,71.9,66.1,71.1,43.9,67.9,89.3,64.5,65.7,51.7,63.4,29.2,59.2,0,0,0
+[VideoChat](https://github.com/OpenGVLab/Ask-Anything),Vicuna-7B,7B,PPL,36.7,35.4,34.2,36.2,37.3,0.0,44.3,40.7,32.2,36.9,32.9,32.6,42.3,51.1,45.7,35.2,46.8,20.6,43.2,39.4,34.3,19.7,30.3,51.6,41.5,34,30.6,27.4,40,30.6,0,0,0
+[Video-ChatGPT](https://github.com/mbzuai-oryx/Video-ChatGPT),LLaMA-7B,7B,PPL,38.3,49.8,31.6,36.9,33.7,0.0,44.1,37,35.8,30.7,44.2,31.1,29.9,49.9,39.8,49.7,40.6,22,33.2,37.2,22.4,25,46.1,61.4,42.6,32.2,27,19,37.5,24.5,0,0,0
+[Valley](https://github.com/RupertLuo/Valley),LLaMA-13B,13B,PPL,35.3,40.7,28.5,33.9,33.7,0.0,45.3,36.4,33.7,30.6,27.1,31.5,35.1,52,35.2,44.9,43.4,23.8,33.2,37.2,26,22.7,37.1,52.2,31.5,32.1,21.9,26.5,35.8,28.6,0,0,0

file/result_v2_task.csv ADDED Viewed

	@@ -0,0 +1,28 @@

+Model,Language Model,Model Size,Evaluation Method,Avg. Single,Avg. Multi,Avg. Video,Avg. P1,Avg. P2,Avg. P3,Scene Understanding,Instance Identity,Instance Attribute,Instance Location,Instance Counting,Spatial Relation,Instance Interaction,Visual Reasoning,Text Recognition,Celebrity Recognition,Landmark Recognition,Chart Understanding,Visual Referring Expression,Science Knowledge,Emotion Recognition,Visual Mathematics,Difference Spotting,Meme Comprehension,Global Video Understanding,Action Recognition,Action Predicion,Procedure Understanding,In-Context Captioning,Interleaved Image-Text Analysis,Text-to-Image Generation,Next Image Prediction,Text-Image Creation
+[BLIP-2](https://github.com/salesforce/LAVIS),Flan-T5-XL,3B,PPL,44.1,28.2,34.8,41,35.3,0,58.5,48.6,49,39.1,43.4,36.2,48.5,52.9,60.7,51.8,51.4,19.2,43.2,52.4,29.3,22,17.8,38.6,42.5,37.7,36.2,22.9,40,30.6,0,0,0
+[InstructBLIP](https://github.com/salesforce/LAVIS),Flan-T5-XL,3B,PPL,45.5,29,35.7,42.2,35.7,0,58.9,49.7,61.7,35.1,58.1,34.9,47.4,55.9,61.4,48.5,45.4,26.4,41.7,47.7,34.5,21.2,22.8,35.2,41.5,36.1,40.5,24.5,36.7,34.7,0,0,0
+[InstructBLIP-Vicuna](https://github.com/salesforce/LAVIS),Vicuna-7B,7B,PPL,43.4,46,31.5,41.4,29.7,0,53.6,43.9,49,37.8,56.5,35.8,43.3,56.2,57.2,60.3,44.4,27.9,39.2,39.4,23,26.5,36.5,55.4,40.4,38.6,31.2,15.6,26.7,32.7,0,0,0
+[LLaVA](https://github.com/haotian-liu/LLaVA),LLaMA-7B,7B,PPL,40.6,38.5,31,38.7,30.2,0,53.8,47.5,38.3,34.2,42,34.7,40.2,52.9,46.4,51.8,45.6,30.3,40.2,37.6,34.3,20.5,27,50,44.1,36.2,25.1,18.6,40,20.4,0,0,0
+[MiniGPT-4](https://github.com/Vision-CAIR/MiniGPT-4),Vicuna-7B,7B,PPL,41.7,32.9,33.4,39.4,34.1,0,56.3,49.2,45.8,37.9,45.3,32.6,47.4,57.1,41.8,55.2,45.2,20.2,41.2,43.3,24.2,25,19,46.7,39,38.7,27.4,28.6,45.8,22.5,0,0,0
+[VPGTrans](https://github.com/VPGTrans/VPGTrans),LLaMA-7B,7B,PPL,37.4,34.3,32.6,36.2,23.9,0,46.9,38.6,33.6,35.6,27.5,34.4,33,50.8,47.6,52.4,38.2,30.1,34.7,36.1,31.5,27.3,24.6,44,37.8,38.2,20.9,33.5,19.2,28.6,0,0,0
+[MultiModal-GPT](https://github.com/open-mmlab/Multimodal-GPT),LLaMA-7B,7B,PPL,37.5,48.3,31.5,37.4,34.9,0,46.9,42.5,32,32.3,27.7,29.7,29.9,48.3,35.2,60.9,50.4,24.2,42.2,37.6,32.1,27.3,40.1,56.5,37.6,38.7,25.3,24.4,39.2,30.6,0,0,0
+[Otter](https://github.com/Luodian/Otter),LLaMA-7B,7B,PPL,37.6,37.1,31.3,36.4,36.6,0,45.9,39.7,31.9,31.6,26.4,32,33,49.2,39.3,59.7,53,23.6,41.2,36.1,37.3,22,27.4,46.7,36.6,37.9,26,24.8,42.5,30.6,0,0,0
+[OpenFlamingo](https://github.com/mlfoundations/open_flamingo),LLaMA-7B,7B,PPL,37.5,47.4,31.3,37.3,35.5,0,46.7,42.3,31.7,33.4,27.4,29.8,29.9,47.7,35.6,60.3,49.8,24.2,42.2,39,32.1,27.3,39.9,54.9,37.6,38.4,25.2,24.1,38.3,32.7,0,0,0
+[LLaMA-AdapterV2](https://github.com/OpenGVLab/LLaMA-Adapter),LLaMA-7B,7B,PPL,39,40.7,29.8,37.5,0,0,45.2,38.5,29.3,33,29.7,35.5,39.2,52,48.7,58.5,46.4,24.2,41.2,40.1,39.7,23.5,29.1,52.2,41.9,38.2,18.8,20.3,0,0,0,0,0
+[GVT](https://github.com/TencentARC/GVT),Vicuna-7B,7B,PPL,33.5,50.4,30.1,34.4,38.6,0,41.7,35.5,31.8,29.5,36.2,32,32,51.1,35.2,39.4,36.4,25,36.2,31.1,20.6,22.7,41.5,59.2,40.4,29.7,26.3,24.1,42.5,34.7,0,0,0
+[mPLUG-Owl](https://github.com/X-PLUG/mPLUG-Owl),LLaMA-7B,7B,PPL,41.3,44.2,29.4,39.4,28.9,0,49.7,45.3,32.5,36.7,27.3,32.7,44.3,54.7,49.2,70.9,49.6,23.2,44.2,44,32.5,23.5,33.5,54.9,42,37.8,18.3,19.3,29.2,28.6,0,0,0
+[Kosmos-2](https://github.com/microsoft/unilm/tree/master/kosmos-2),Decoder only 1.3B,1.3B,PPL,49.5,34,39.7,46.3,23.3,0,63.4,57.1,58.5,44,41.4,37.9,55.7,60.7,68.1,82.1,51.4,21.2,48.2,43.7,30.7,28,25.2,42.8,48.5,40.8,39.5,30,24.2,22.5,0,0,0
+[Qwen-VL-Chat](https://huggingface.co/Qwen/Qwen-VL-Chat),Qwen-7B,7B,PPL,46,40.8,32.8,43.1,35.5,0,56.5,47.6,54.8,46.9,54.2,40.3,55.7,55,47.4,62.4,55.6,25.2,43.7,41.2,20.6,28.8,34.3,47.2,39.7,42.8,29.6,19.1,42.5,28.6,0,0,0
+[Qwen-VL-plus](https://github.com/QwenLM/Qwen-VL/tree/master?tab=readme-ov-file#qwen-vl-plus),Qwen-LM,-,PPL for A/B/C/D,69,74.3,45.9,65.3,30.5,0,76.6,77.7,76.3,65.1,65.8,55.9,73.2,77.9,61.8,97,97.2,39.5,73.4,75.8,51.7,38.6,66.7,81.8,51.8,54.5,29.3,48,28.3,32.7,0,0,0
+[LLaVA-1.5](https://github.com/haotian-liu/LLaVA),vicuna-7B,7B,PPL,50.8,43,35.8,47.3,30.8,0,63.7,62.4,66.7,51.3,60.2,38.5,47.4,59.8,69,60.6,49.8,25,45.7,56.7,31.1,24.2,35.7,50.3,46.1,39.4,29.4,28.1,39.2,22.5,0,0,0
+[IDEFICS-9b-instruct](https://huggingface.co/HuggingFaceM4/idefics-9b-instruct),LLaMA-7B,7B,PPL,37.9,52.5,31.4,38,40.3,0,48.2,38.2,37.8,32.9,29,32.4,37.1,54.1,45.5,52.4,52.8,22.6,42.7,33.2,26.6,21.2,56.5,48.4,42.7,38.6,23.6,20.5,45.8,34.7,0,0,0
+[InternLM-XComposer-VL](https://github.com/InternLM/InternLM-XComposer),InternLM-7B,7B,PPL,64.2,52.2,42.8,59.2,32.1,0,74.8,70.5,67.6,60.5,55.3,53.4,76.3,76.1,61.4,86.1,78,27.2,60.3,84.8,68.9,25.8,47.7,56.6,58.6,49.9,37.6,24.9,27.5,36.7,0,0,0
+[SPHINXv1-1k](https://github.com/Alpha-VLLM/LLaMA2-Accessory/tree/main/SPHINX),LLaMA-2-13B,13B,Generate,63,39.7,38.4,56.4,31.9,0,75.5,72.4,75.1,63.1,67.6,50.4,64.9,76.7,60,81.5,82.4,21.8,60.3,58.5,65.5,32.6,35.9,43.4,52.4,41.2,33.9,26.1,33.3,30.6,0,0,0
+[SPHINXv2-1k](https://github.com/Alpha-VLLM/LLaMA2-Accessory/tree/main/SPHINX),LLaMA-2-13B,13B,Generate,66.7,43.9,42.6,60.2,37,0,77.5,78.5,76.6,69,71,57.5,73.2,77.6,62.1,82.7,85.2,44.5,62.3,60.3,65.3,23.5,45.7,42.1,54.6,48.1,37.9,29.9,33.3,40.8,0,0,0
+[Emu](https://github.com/baaivision/Emu),LLaMA-13B,13B,PPL,45.3,33.2,36.1,42.5,41.1,41.4,59,50,43.7,37.1,44.3,33.6,49.5,58.3,61.4,68.8,61.6,19,45.7,41.5,24.2,26.4,29.3,37.1,41.9,42.7,37.9,21.8,51.7,30.6,46.8,43.2,34.2
+[Next-GPT](https://github.com/NExT-GPT/NExT-GPT),vicuna-7B,7B,PPL,30.8,31.6,29.9,30.7,35.6,33.9,36.4,35.1,25.6,29.9,36.1,30.9,39.2,41.7,31,30.9,27.4,21.2,34.2,31.8,24.4,17.4,24.2,39,35.5,33.8,25.6,24.5,46.7,24.5,45.1,19.8,36.7
+[SEED-LLaMA](https://github.com/AILab-CVC/SEED),LLaMA2-Chat-13B,13B,PPL,46.5,35.5,37.6,43.9,43.4,52.3,64,55,51.3,45.4,43.3,37.9,56.7,59.2,57,55.5,52.8,18.8,49.3,44.8,28.8,24.4,29.5,41.5,46.7,39.4,43.9,20.3,54.2,32.7,50.2,40.7,65.8
+[GPT-4V](https://openai.com/research/gpt-4v-system-card),\-,-,Generate,70,78.6,61.3,69.2,44.2,0,77.5,73.9,70.6,61.8,56.8,56.9,74.2,78.5,82.3,91.8,97.4,45.1,71.9,66.1,71.1,43.9,67.9,89.3,64.5,65.7,51.7,63.4,29.2,59.2,0,0,0
+[VideoChat](https://github.com/OpenGVLab/Ask-Anything),Vicuna-7B,7B,PPL,37.4,41,33.4,37,35.3,0,44.3,40.7,32.2,36.9,32.9,32.6,42.3,51.1,45.7,35.2,46.8,20.6,43.2,39.4,34.3,19.7,30.3,51.6,41.5,34,30.6,27.4,40,30.6,0,0,0
+[Video-ChatGPT](https://github.com/mbzuai-oryx/Video-ChatGPT),LLaMA-7B,7B,PPL,35.8,53.8,30.2,36.4,31,0,44.1,37,35.8,30.7,44.2,31.1,29.9,49.9,39.8,49.7,40.6,22,33.2,37.2,22.4,25,46.1,61.4,42.6,32.2,27,19,37.5,24.5,0,0,0
+[Valley](https://github.com/RupertLuo/Valley),LLaMA-13B,13B,PPL,34.9,44.7,28,34.5,32.2,0,45.3,36.4,33.7,30.6,27.1,31.5,35.1,52,35.2,44.9,43.4,23.8,33.2,37.2,26,22.7,37.1,52.2,31.5,32.1,21.9,26.5,35.8,28.6,0,0,0

src/__pycache__/utils_display.cpython-38.pyc CHANGED Viewed

Binary files a/src/__pycache__/utils_display.cpython-38.pyc and b/src/__pycache__/utils_display.cpython-38.pyc differ

src/auto_leaderboard/__pycache__/model_metadata_type.cpython-38.pyc CHANGED Viewed

Binary files a/src/auto_leaderboard/__pycache__/model_metadata_type.cpython-38.pyc and b/src/auto_leaderboard/__pycache__/model_metadata_type.cpython-38.pyc differ