Spaces:

AILab-CVC
/

SEED-Bench_Leaderboard

Running

App Files Files Community

tttoaster commited on Nov 30, 2023

Commit

20e4287

•

1 Parent(s): e5bd87d

Upload 12 files

Browse files

Files changed (7) hide show

__pycache__/constants.cpython-38.pyc +0 -0
app.py +103 -4
constants.py +13 -26
file/result.csv +2 -2
file/result_v2.csv +24 -0
src/__pycache__/utils_display.cpython-38.pyc +0 -0
src/auto_leaderboard/__pycache__/model_metadata_type.cpython-38.pyc +0 -0

__pycache__/constants.cpython-38.pyc CHANGED Viewed

Binary files a/__pycache__/constants.cpython-38.pyc and b/__pycache__/constants.cpython-38.pyc differ

app.py CHANGED Viewed

@@ -154,11 +154,25 @@ def get_baseline_df():
     df = df[present_columns]
     return df
 def get_all_df():
     df = pd.read_csv(CSV_DIR)
     df = df.sort_values(by="Avg. All", ascending=False)
     return df
 block = gr.Blocks()
@@ -167,7 +181,82 @@ with block:
         LEADERBORAD_INTRODUCTION
     )
     with gr.Tabs(elem_classes="tab-buttons") as tabs:
-        with gr.TabItem("🏅 SEED Benchmark", elem_id="seed-benchmark-tab-table", id=0):
             with gr.Row():
                 with gr.Accordion("Citation", open=False):
                     citation_button = gr.Textbox(
@@ -182,11 +271,21 @@ with block:
             # selection for column part:
             checkbox_group = gr.CheckboxGroup(
-                choices=TASK_INFO_v2,
                 value=AVG_INFO,
-                label="Select options",
                 interactive=True,
             )
             # 创建数据帧组件
             data_component = gr.components.Dataframe(
@@ -200,7 +299,7 @@ with block:
             def on_checkbox_group_change(selected_columns):
                 # pdb.set_trace()
-                selected_columns = [item for item in TASK_INFO_v2 if item in selected_columns]
                 present_columns = MODEL_INFO + selected_columns
                 updated_data = get_all_df()[present_columns]
                 updated_data = updated_data.sort_values(by=present_columns[3], ascending=False)

     df = df[present_columns]
     return df
+def get_baseline_v2_df():
+    # pdb.set_trace()
+    df = pd.read_csv(CSV_V2_DIR)
+    df = df.sort_values(by="Avg. P1", ascending=False)
+    present_columns = MODEL_INFO_V2 + checkbox_group_v2.value
+    # pdb.set_trace()
+    df = df[present_columns]
+    return df
 def get_all_df():
     df = pd.read_csv(CSV_DIR)
     df = df.sort_values(by="Avg. All", ascending=False)
     return df
+def get_all_v2_df():
+    df = pd.read_csv(CSV_V2_DIR)
+    df = df.sort_values(by="Avg. P1", ascending=False)
+    return df
 block = gr.Blocks()
         LEADERBORAD_INTRODUCTION
     )
     with gr.Tabs(elem_classes="tab-buttons") as tabs:
+        with gr.TabItem("🏅 SEED Benchmark v2", elem_id="seed-benchmark-tab-table", id=0):
+            with gr.Row():
+                with gr.Accordion("Citation", open=False):
+                    citation_button = gr.Textbox(
+                        value=CITATION_BUTTON_TEXT,
+                        label=CITATION_BUTTON_LABEL,
+                        elem_id="citation-button",
+                    ).style(show_copy_button=True)
+            gr.Markdown(
+                TABLE_INTRODUCTION
+            )
+            # selection for column part:
+            checkbox_group_v2 = gr.CheckboxGroup(
+                choices=TASK_V2_INFO,
+                value=AVG_V2_INFO,
+                label="Evaluation Dimension",
+                interactive=True,
+            )
+            '''
+            # selection for model size part:
+            filter_model_size = gr.CheckboxGroup(
+                choices=MODEL_SIZE,
+                value=MODEL_SIZE,
+                label="Model Size",
+                interactive=True,
+            )
+            filter_dimension_level = gr.CheckboxGroup(
+                        choices=DIMENSION_LEVEL,
+                        label="Model level",
+                        multiselect=False,
+                        value=DIMENSION_LEVEL[1],
+                        interactive=True,
+                    )
+            '''
+            # 创建数据帧组件
+            data_component_v2 = gr.components.Dataframe(
+                value=get_baseline_v2_df,
+                headers=COLUMN_V2_NAMES,
+                type="pandas",
+                datatype=DATA_TITILE_V2_TYPE,
+                interactive=False,
+                visible=True,
+                )
+            def on_checkbox_group_v2_change(selected_columns):
+                # pdb.set_trace()
+                selected_columns = [item for item in TASK_V2_INFO if item in selected_columns]
+                present_columns = MODEL_INFO_V2 + selected_columns
+                updated_data = get_all_v2_df()[present_columns]
+                updated_data = updated_data.sort_values(by=present_columns[2], ascending=False)
+                updated_headers = present_columns
+                # pdb.set_trace()
+                update_datatype = [DATA_TITILE_V2_TYPE[COLUMN_V2_NAMES.index(x)] for x in updated_headers]
+                filter_component = gr.components.Dataframe(
+                    value=updated_data,
+                    headers=updated_headers,
+                    type="pandas",
+                    datatype=update_datatype,
+                    interactive=False,
+                    visible=True,
+                    )
+                # pdb.set_trace()
+                return filter_component.value
+            # 将复选框组关联到处理函数
+            checkbox_group_v2.change(fn=on_checkbox_group_v2_change, inputs=checkbox_group_v2, outputs=data_component_v2)
+        # table seed-bench-v1
+        with gr.TabItem("🏅 SEED Benchmark v1", elem_id="seed-benchmark-tab-table", id=1):
             with gr.Row():
                 with gr.Accordion("Citation", open=False):
                     citation_button = gr.Textbox(
             # selection for column part:
             checkbox_group = gr.CheckboxGroup(
+                choices=TASK_INFO,
                 value=AVG_INFO,
+                label="Evaluation Dimension",
+                interactive=True,
+            )
+            '''
+            # selection for model size part:
+            filter_model_size = gr.CheckboxGroup(
+                choices=MODEL_SIZE,
+                value=MODEL_SIZE,
+                label="Model Size",
                 interactive=True,
             )
+            '''
             # 创建数据帧组件
             data_component = gr.components.Dataframe(
             def on_checkbox_group_change(selected_columns):
                 # pdb.set_trace()
+                selected_columns = [item for item in TASK_INFO if item in selected_columns]
                 present_columns = MODEL_INFO + selected_columns
                 updated_data = get_all_df()[present_columns]
                 updated_data = updated_data.sort_values(by=present_columns[3], ascending=False)

constants.py CHANGED Viewed

@@ -1,36 +1,25 @@
 # this is .py for store constants
 MODEL_INFO = ["Model Type", "Model", "Language Model"]
-TASK_INFO = ["Scene Understanding", "Instance Identity", "Instance Attributes", "Instance Localization", "Instance Counting", "Spatial Relation", "Instance Interaction", "Visual Reasoning", "Text Recognition", "Avg. Img", "Action Recognition", "Action Prediction", "Procedure Understanding", "Avg. Video", "Avg. All"]
-TASK_INFO_v2 = ["Avg. All", "Avg. Img", "Avg. Video", "Scene Understanding", "Instance Identity", "Instance Attributes", "Instance Localization", "Instance Counting", "Spatial Relation", "Instance Interaction", "Visual Reasoning", "Text Recognition",  "Action Recognition", "Action Prediction", "Procedure Understanding"]
 AVG_INFO = ["Avg. All", "Avg. Img", "Avg. Video"]
 DATA_TITILE_TYPE = ["markdown", "markdown", "markdown", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number"]
 CSV_DIR = "./file/result.csv"
-# COLUMN_NAMES = MODEL_INFO + TASK_INFO
-COLUMN_NAMES = MODEL_INFO + TASK_INFO_v2
 DATA_NUM = [3158, 1831, 4649, 978, 2447, 657, 97, 331, 85, 1740, 2077, 1192]
-UNTUNED_MODEL_RESULTS = '''LLM & Flan-T5               & Flan-T5-XL &23.0 &29.0 &32.8 &31.8 &20.5 &31.8 &33.0 &18.2 &19.4 &23.2 &34.9 &25.4 \\
-         LLM                   & Vicuna                & Vicuna-7B  &23.4 &30.7 &29.7 &30.9 &30.8 &28.6 &29.8 &18.5 &13.4 &27.3 &34.5 &23.8 \\
-         LLM                   & LLaMA                 & LLaMA-7B   &26.3 &27.4 &26.2 &28.3 &25.1 &28.8 &19.2 &37.0 & 9.0 &33.0 &23.1 &26.2  \\
-         ImageLLM              & BLIP-2                & Flan-T5-XL &59.1 &53.9 &49.2 &42.3 &43.2 &36.7 &55.7 &45.6 &25.9 &32.6 &47.5 &24.0 \\
-         ImageLLM              & InstructBLIP          & Flan-T5-XL &60.3 &58.5 &63.4 &40.6 &58.4 &38.7 &51.6 &45.9 &25.9 &33.1 &49.1 &27.1 \\
-         ImageLLM              & InstructBLIP-Vicuna   & Vicuna-7B  &60.2 &58.9 &65.6 &43.6 &57.2 &40.3 &52.6 &47.7 &43.5 &34.5 &49.6 &23.1 \\
-         ImageLLM              & LLaVA                 & LLaMA-7B   &42.7 &34.9 &33.5 &28.4 &41.9 &30.8 &27.8 &46.8 &27.7 &29.7 &21.4 &19.1 \\
-         ImageLLM              & MiniGPT-4             & Flan-T5-XL &56.3 &49.2 &45.8 &37.9 &45.3 &32.6 &47.4 &57.1 &11.8 &38.2 &24.5 &27.1 \\
-         ImageLLM              & VPGTrans              & LLaMA-7B   &51.9 &44.1 &39.9 &36.1 &33.7 &36.4 &32.0 &53.2 &30.6 &39.5 &24.3 &31.9 \\
-         ImageLLM              & MultiModal-GPT        & LLaMA-7B   &43.6 &37.9 &31.5 &30.8 &27.3 &30.1 &29.9 &51.4 &18.8 &36.9 &25.8 &24.0 \\
-         ImageLLM              & Otter                 & LLaMA-7B   &44.9 &38.6 &32.2 &30.9 &26.3 &31.8 &32.0 &51.4 &31.8 &37.9 &27.2 &24.8 \\
-         ImageLLM              & OpenFlamingo          & LLaMA-7B   &43.9 &38.1 &31.3 &30.1 &27.3 &30.6 &29.9 &50.2 &20.0 &37.2 &25.4 &24.2 \\
-         ImageLLM              & LLaMA-Adapter V2      & LLaMA-7B   &45.2 &38.5 &29.3 &33.0 &29.7 &35.5 &39.2 &52.0 &24.7 &38.6 &18.5 &19.6 \\
-         ImageLLM              & GVT                   & Vicuna-7B  &41.7 &35.5 &31.8 &29.5 &36.2 &32.0 &32.0 &51.1 &27.1 &33.9 &25.4 &23.0 \\
-         ImageLLM              & mPLUG-Owl             & LLaMA-7B   &49.7 &45.3 &32.5 &36.7 &27.3 &32.7 &44.3 &54.7 &28.8 &26.7 &17.9 &26.5 \\
-         VideoLLM              & VideoChat             & Vicuna-7B  &47.1 &43.8 &34.9 &40.0 &32.8 &34.6 &42.3 &50.5 &17.7 &34.9 &36.4 &27.3 \\
-         VideoLLM              & Video-ChatGPT         & LLaMA-7B   &37.2 &31.4 &33.2 &28.4 &35.5 &29.5 &23.7 &42.3 &25.9 &27.6 &21.3 &21.1 \\
-         VideoLLM              & Valley                & LLaMA-13B  &39.3 &32.9 &31.6 &27.9 &24.2 &30.1 &27.8 &43.8 &11.8 &31.3 &23.2 &20.7 \\'''
 LEADERBORAD_INTRODUCTION = """# SEED-Bench Leaderboard
@@ -76,8 +65,6 @@ LEADERBORAD_INFO = """
       By revealing the limitations of existing MLLMs through evaluation results, we aim for SEED-Bench to provide insights for motivating future research.
 """
 CITATION_BUTTON_LABEL = "Copy the following snippet to cite these results"
 CITATION_BUTTON_TEXT = r"""@article{li2023seed,
   title={SEED-Bench: Benchmarking Multimodal LLMs with Generative Comprehension},

 # this is .py for store constants
 MODEL_INFO = ["Model Type", "Model", "Language Model"]
+MODEL_INFO_V2 = ["Model", "Language Model"]
+MODEL_SIZE = ["<10B", ">=10B"]
+DIMENSION_LEVEL = ["L1", "L2", "L3"]
+LEADERBOARD_VERSION = ["Version1", "Version2"]
+TASK_INFO = ["Avg. All", "Avg. Img", "Avg. Video", "Scene Understanding", "Instance Identity", "Instance Attribute", "Instance Location", "Instance Counting", "Spatial Relation", "Instance Interaction", "Visual Reasoning", "Text Recognition",  "Action Recognition", "Action Prediction", "Procedure Understanding"]
+TASK_V2_INFO = ["Avg. P1", "Avg. P2", "Avg. P3", "Scene Understanding", "Instance Identity", "Instance Attribute", "Instance Location", "Instance Counting", "Spatial Relation", "Instance Interaction", "Visual Reasoning", "Text Recognition", "Celebrity Recognition", "Landmark Recognition", "Chart Understanding", "Visual Referring Expression", "Science Knowledge", "Emotion Recognition", "Visual Mathematics", "Difference Spotting", "Meme Comprehension", "Global Video Understanding", "Action Recognition", "Action Predicion", "Procedure Understanding", "In-Context Captioning", "Interleaved Image-Text Analysis", "Text-to-Image Generation", "Next Image Prediction", "Text-Image Creation"]
 AVG_INFO = ["Avg. All", "Avg. Img", "Avg. Video"]
+AVG_V2_INFO = ["Avg. P1", "Avg. P2", "Avg. P3"]
 DATA_TITILE_TYPE = ["markdown", "markdown", "markdown", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number"]
+DATA_TITILE_V2_TYPE = ["markdown", "markdown", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number"]
 CSV_DIR = "./file/result.csv"
+CSV_V2_DIR = "./file/result_v2.csv"
+COLUMN_NAMES = MODEL_INFO + TASK_INFO
+COLUMN_V2_NAMES = MODEL_INFO_V2 + TASK_V2_INFO
 DATA_NUM = [3158, 1831, 4649, 978, 2447, 657, 97, 331, 85, 1740, 2077, 1192]
+DATA_NUM_V2 = [3158, 1831, 4649, 978, 2447, 657, 97, 331, 435, 330, 500, 501, 199, 277, 501, 132, 501, 159, 1594, 1509, 1225, 1023, 120, 49, 1008, 81, 79]
 LEADERBORAD_INTRODUCTION = """# SEED-Bench Leaderboard
       By revealing the limitations of existing MLLMs through evaluation results, we aim for SEED-Bench to provide insights for motivating future research.
 """
 CITATION_BUTTON_LABEL = "Copy the following snippet to cite these results"
 CITATION_BUTTON_TEXT = r"""@article{li2023seed,
   title={SEED-Bench: Benchmarking Multimodal LLMs with Generative Comprehension},

file/result.csv CHANGED Viewed

@@ -22,9 +22,9 @@ ImageLLM,[Qwen-VL](https://huggingface.co/Qwen/Qwen-VL),Qwen-7B,56.3,62.3,39.1,7
 ImageLLM,[IDEFICS-9b-instruct](https://huggingface.co/HuggingFaceM4/idefics-9b-instruct),LLaMA-7B,0,44.5,0,55.8,45.3,42.3,40.2,36.8,34.9,37.1,55.9,38.8,0,0,0
 ImageLLM,[IDEFICS-80b-instruct](https://huggingface.co/HuggingFaceM4/idefics-9b-instruct),LLaMA-65B,0,53.2,0,64,52.6,50.8,48.3,46.1,45.5,62.9,68,51.8,0,0,0
 ImageLLM,[InternLM-XComposer-VL](https://github.com/InternLM/InternLM-XComposer),InternLM-7B,0,66.9,0,75,71.7,67.6,60.8,56.2,55.3,74.4,77,48.5,0,0,0
-ImageLLM,[SEED-LLaMA](https://github.com/AILab-CVC/SEED),LLaMA2-Chat-13b,48.9,53.7,35.4,64.1,54.2,54.1,46.5,45.3,38.2,51.6,60.7,44.7,37.8,45.3,20.0
 ImageLLM,[mPLUG-Owl2](https://github.com/X-PLUG/mPLUG-Owl),LLaMA-7B,57.8,64.1,39.8,72.7,67.6,63.6,53.6,58.5,50.8,70.1,76.4,30.2,46.0,38.7,32.9
-ImageLLM,[LLaMA-VID-7B](),LLaMA-7B,59.9,67.6,37.9,75.4,71.2,68.9,62.9,58.4,50.7,70.1,76.1,54.7,42.8,35.2,35.6
 ImageLLM,[Pink-LLaMA2](https://github.com/SY-Xuan/Pink/stargazers),LLaMA2-7B,0,67.0,0,75.2,70.1,70.1,63.3,53.8,50.2,69.1,74.3,50.0,0,0,0
 ImageLLM,[InfMLLM-13B](https://github.com/mightyzau/InfMLLM),Vicuna-13B,62.3,69.6,41.5,75.5,73,70.4,66.2,63.3,54.2,72.2,77.9,37.2,49.5,39,33.9
 ImageLLM,[ShareGPT4V-7B](https://github.com/InternLM/InternLM-XComposer/tree/main/projects/ShareGPT4V),Vicuna-7B,0,69.7,0,75.3,71.4,72.3,63.1,62,53.9,70.1,79.8,54.7,0,0,0

 ImageLLM,[IDEFICS-9b-instruct](https://huggingface.co/HuggingFaceM4/idefics-9b-instruct),LLaMA-7B,0,44.5,0,55.8,45.3,42.3,40.2,36.8,34.9,37.1,55.9,38.8,0,0,0
 ImageLLM,[IDEFICS-80b-instruct](https://huggingface.co/HuggingFaceM4/idefics-9b-instruct),LLaMA-65B,0,53.2,0,64,52.6,50.8,48.3,46.1,45.5,62.9,68,51.8,0,0,0
 ImageLLM,[InternLM-XComposer-VL](https://github.com/InternLM/InternLM-XComposer),InternLM-7B,0,66.9,0,75,71.7,67.6,60.8,56.2,55.3,74.4,77,48.5,0,0,0
+ImageLLM,[SEED-LLaMA](https://github.com/AILab-CVC/SEED),LLaMA2-Chat-13B,48.9,53.7,35.4,64.1,54.2,54.1,46.5,45.3,38.2,51.6,60.7,44.7,37.8,45.3,20.0
 ImageLLM,[mPLUG-Owl2](https://github.com/X-PLUG/mPLUG-Owl),LLaMA-7B,57.8,64.1,39.8,72.7,67.6,63.6,53.6,58.5,50.8,70.1,76.4,30.2,46.0,38.7,32.9
+ImageLLM,[LLaMA-VID-7B](https://github.com/dvlab-research/LLaMA-VID),LLaMA-7B,59.9,67.6,37.9,75.4,71.2,68.9,62.9,58.4,50.7,70.1,76.1,54.7,42.8,35.2,35.6
 ImageLLM,[Pink-LLaMA2](https://github.com/SY-Xuan/Pink/stargazers),LLaMA2-7B,0,67.0,0,75.2,70.1,70.1,63.3,53.8,50.2,69.1,74.3,50.0,0,0,0
 ImageLLM,[InfMLLM-13B](https://github.com/mightyzau/InfMLLM),Vicuna-13B,62.3,69.6,41.5,75.5,73,70.4,66.2,63.3,54.2,72.2,77.9,37.2,49.5,39,33.9
 ImageLLM,[ShareGPT4V-7B](https://github.com/InternLM/InternLM-XComposer/tree/main/projects/ShareGPT4V),Vicuna-7B,0,69.7,0,75.3,71.4,72.3,63.1,62,53.9,70.1,79.8,54.7,0,0,0

file/result_v2.csv ADDED Viewed

	@@ -0,0 +1,24 @@

+Model,Language Model,Avg. P1,Avg. P2,Avg. P3,Scene Understanding,Instance Identity,Instance Attribute,Instance Location,Instance Counting,Spatial Relation,Instance Interaction,Visual Reasoning,Text Recognition,Celebrity Recognition,Landmark Recognition,Chart Understanding,Visual Referring Expression,Science Knowledge,Emotion Recognition,Visual Mathematics,Difference Spotting,Meme Comprehension,Global Video Understanding,Action Recognition,Action Predicion,Procedure Understanding,In-Context Captioning,Interleaved Image-Text Analysis,Text-to-Image Generation,Next Image Prediction,Text-Image Creation
+[BLIP-2](https://github.com/salesforce/LAVIS),Flan-T5-XL,41,35.3,0,58.5,48.6,49,39.1,43.4,36.2,48.5,52.9,60.7,51.8,51.4,19.2,43.2,52.4,29.3,22,17.8,38.6,42.5,37.7,36.2,22.9,40,30.6,0,0,0
+[InstructBLIP](https://github.com/salesforce/LAVIS),Flan-T5-XL,42.2,35.7,0,58.9,49.7,61.7,35.1,58.1,34.9,47.4,55.9,61.4,48.5,45.4,26.4,41.7,47.7,34.5,21.2,22.8,35.2,41.5,36.1,40.5,24.5,36.7,34.7,0,0,0
+[InstructBLIP-Vicuna](https://github.com/salesforce/LAVIS),Vicuna-7B,41.4,29.7,0,53.6,43.9,49,37.8,56.5,35.8,43.3,56.2,57.2,60.3,44.4,27.9,39.2,39.4,23,26.5,36.5,55.4,40.4,38.6,31.2,15.6,26.7,32.7,0,0,0
+[LLaVA](https://github.com/haotian-liu/LLaVA),LLaMA-7B,38.7,30.2,0,53.8,47.5,38.3,34.2,42,34.7,40.2,52.9,46.4,51.8,45.6,30.3,40.2,37.6,34.3,20.5,27,50,44.1,36.2,25.1,18.6,40,20.4,0,0,0
+[MiniGPT-4](https://github.com/Vision-CAIR/MiniGPT-4),Vicuna-7B,39.4,34.1,0,56.3,49.2,45.8,37.9,45.3,32.6,47.4,57.1,41.8,55.2,45.2,20.2,41.2,43.3,24.2,25,19,46.7,39,38.7,27.4,28.6,45.8,22.5,0,0,0
+[VPGTrans](https://github.com/VPGTrans/VPGTrans),LLaMA-7B,36.2,23.9,0,46.9,38.6,33.6,35.6,27.5,34.4,33,50.8,47.6,52.4,38.2,30.1,34.7,36.1,31.5,27.3,24.6,44,37.8,38.2,20.9,33.5,19.2,28.6,0,0,0
+[MultiModal-GPT](https://github.com/open-mmlab/Multimodal-GPT),LLaMA-7B,37.4,34.9,0,46.9,42.5,32,32.3,27.7,29.7,29.9,48.3,35.2,60.9,50.4,24.2,42.2,37.6,32.1,27.3,40.1,56.5,37.6,38.7,25.3,24.4,39.2,30.6,0,0,0
+[Otter](https://github.com/Luodian/Otter),LLaMA-7B,36.4,36.6,0,45.9,39.7,31.9,31.6,26.4,32,33,49.2,39.3,59.7,53,23.6,41.2,36.1,37.3,22,27.4,46.7,36.6,37.9,26,24.8,42.5,30.6,0,0,0
+[OpenFlamingo](https://github.com/mlfoundations/open_flamingo),LLaMA-7B,37.3,35.5,0,46.7,42.3,31.7,33.4,27.4,29.8,29.9,47.7,35.6,60.3,49.8,24.2,42.2,39,32.1,27.3,39.9,54.9,37.6,38.4,25.2,24.1,38.3,32.7,0,0,0
+[LLaMA-AdapterV2](https://github.com/OpenGVLab/LLaMA-Adapter),LLaMA-7B,37.5,0,0,45.2,38.5,29.3,33,29.7,35.5,39.2,52,48.7,58.5,46.4,24.2,41.2,40.1,39.7,23.5,29.1,52.2,41.9,38.2,18.8,20.3,0,0,0,0,0
+[GVT](https://github.com/TencentARC/GVT),Vicuna-7B,34.4,38.6,0,41.7,35.5,31.8,29.5,36.2,32,32,51.1,35.2,39.4,36.4,25,36.2,31.1,20.6,22.7,41.5,59.2,40.4,29.7,26.3,24.1,42.5,34.7,0,0,0
+[mPLUG-Owl](https://github.com/X-PLUG/mPLUG-Owl),LLaMA-7B,39.4,28.9,0,49.7,45.3,32.5,36.7,27.3,32.7,44.3,54.7,49.2,70.9,49.6,23.2,44.2,44,32.5,23.5,33.5,54.9,42,37.8,18.3,19.3,29.2,28.6,0,0,0
+[Kosmos-2](https://github.com/microsoft/unilm/tree/master/kosmos-2),Decoder only 1.3B,46.3,23.3,0,63.4,57.1,58.5,44,41.4,37.9,55.7,60.7,68.1,82.1,51.4,21.2,48.2,43.7,30.7,28,25.2,42.8,48.5,40.8,39.5,30,24.2,22.5,0,0,0
+[Qwen-VL-Chat](https://huggingface.co/Qwen/Qwen-VL-Chat),Qwen-7B,43.1,35.5,0,56.5,47.6,54.8,46.9,54.2,40.3,55.7,55,47.4,62.4,55.6,25.2,43.7,41.2,20.6,28.8,34.3,47.2,39.7,42.8,29.6,19.1,42.5,28.6,0,0,0
+[LLaVA-1.5](https://github.com/haotian-liu/LLaVA),vicuna-7B,47.3,30.8,0,63.7,62.4,66.7,51.3,60.2,38.5,47.4,59.8,69,60.6,49.8,25,45.7,56.7,31.1,24.2,35.7,50.3,46.1,39.4,29.4,28.1,39.2,22.5,0,0,0
+[IDEFICS-9b-instruct](https://huggingface.co/HuggingFaceM4/idefics-9b-instruct),LLaMA-7B,38,40.3,0,48.2,38.2,37.8,32.9,29,32.4,37.1,54.1,45.5,52.4,52.8,22.6,42.7,33.2,26.6,21.2,56.5,48.4,42.7,38.6,23.6,20.5,45.8,34.7,0,0,0
+[InternLM-XComposer-VL](https://github.com/InternLM/InternLM-XComposer),InternLM-7B,59.2,32.1,0,74.8,70.5,67.6,60.5,55.3,53.4,76.3,76.1,61.4,86.1,78,27.2,60.3,84.8,68.9,25.8,47.7,56.6,58.6,49.9,37.6,24.9,27.5,36.7,0,0,0
+[Emu](https://github.com/baaivision/Emu),LLaMA-13B,42.5,41.1,41.4,59,50,43.7,37.1,44.3,33.6,49.5,58.3,61.4,68.8,61.6,19,45.7,41.5,24.2,26.4,29.3,37.1,41.9,42.7,37.9,21.8,51.7,30.6,46.8,43.2,34.2
+[Next-GPT](https://github.com/NExT-GPT/NExT-GPT),vicuna-7B,30.7,35.6,33.9,36.4,35.1,25.6,29.9,36.1,30.9,39.2,41.7,31,30.9,27.4,21.2,34.2,31.8,24.4,17.4,24.2,39,35.5,33.8,25.6,24.5,46.7,24.5,45.1,19.8,36.7
+[seed-llama](https://github.com/AILab-CVC/SEED),LLaMA2-Chat-13B,43.9,43.4,52.3,64,55,51.3,45.4,43.3,37.9,56.7,59.2,57,55.5,52.8,18.8,49.3,44.8,28.8,24.4,29.5,41.5,46.7,39.4,43.9,20.3,54.2,32.7,50.2,40.7,65.8
+[VideoChat](https://github.com/OpenGVLab/Ask-Anything),Vicuna-7B,37,35.3,0,44.3,40.7,32.2,36.9,32.9,32.6,42.3,51.1,45.7,35.2,46.8,20.6,43.2,39.4,34.3,19.7,30.3,51.6,41.5,34,30.6,27.4,40,30.6,0,0,0
+[Video-ChatGPT](https://github.com/mbzuai-oryx/Video-ChatGPT),LLaMA-7B,36.4,31,0,44.1,37,35.8,30.7,44.2,31.1,29.9,49.9,39.8,49.7,40.6,22,33.2,37.2,22.4,25,46.1,61.4,42.6,32.2,27,19,37.5,24.5,0,0,0
+[Valley](https://github.com/RupertLuo/Valley),LLaMA-13B,34.5,32.2,0,45.3,36.4,33.7,30.6,27.1,31.5,35.1,52,35.2,44.9,43.4,23.8,33.2,37.2,26,22.7,37.1,52.2,31.5,32.1,21.9,26.5,35.8,28.6,0,0,0

src/__pycache__/utils_display.cpython-38.pyc CHANGED Viewed

Binary files a/src/__pycache__/utils_display.cpython-38.pyc and b/src/__pycache__/utils_display.cpython-38.pyc differ

src/auto_leaderboard/__pycache__/model_metadata_type.cpython-38.pyc CHANGED Viewed

Binary files a/src/auto_leaderboard/__pycache__/model_metadata_type.cpython-38.pyc and b/src/auto_leaderboard/__pycache__/model_metadata_type.cpython-38.pyc differ