Upload 12 files
Browse files- __pycache__/constants.cpython-38.pyc +0 -0
- app.py +103 -4
- constants.py +13 -26
- file/result.csv +2 -2
- file/result_v2.csv +24 -0
- src/__pycache__/utils_display.cpython-38.pyc +0 -0
- src/auto_leaderboard/__pycache__/model_metadata_type.cpython-38.pyc +0 -0
__pycache__/constants.cpython-38.pyc
CHANGED
Binary files a/__pycache__/constants.cpython-38.pyc and b/__pycache__/constants.cpython-38.pyc differ
|
|
app.py
CHANGED
@@ -154,11 +154,25 @@ def get_baseline_df():
|
|
154 |
df = df[present_columns]
|
155 |
return df
|
156 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
157 |
def get_all_df():
|
158 |
df = pd.read_csv(CSV_DIR)
|
159 |
df = df.sort_values(by="Avg. All", ascending=False)
|
160 |
return df
|
161 |
|
|
|
|
|
|
|
|
|
|
|
162 |
block = gr.Blocks()
|
163 |
|
164 |
|
@@ -167,7 +181,82 @@ with block:
|
|
167 |
LEADERBORAD_INTRODUCTION
|
168 |
)
|
169 |
with gr.Tabs(elem_classes="tab-buttons") as tabs:
|
170 |
-
with gr.TabItem("🏅 SEED Benchmark", elem_id="seed-benchmark-tab-table", id=0):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
171 |
with gr.Row():
|
172 |
with gr.Accordion("Citation", open=False):
|
173 |
citation_button = gr.Textbox(
|
@@ -182,11 +271,21 @@ with block:
|
|
182 |
|
183 |
# selection for column part:
|
184 |
checkbox_group = gr.CheckboxGroup(
|
185 |
-
choices=
|
186 |
value=AVG_INFO,
|
187 |
-
label="
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
188 |
interactive=True,
|
189 |
)
|
|
|
190 |
|
191 |
# 创建数据帧组件
|
192 |
data_component = gr.components.Dataframe(
|
@@ -200,7 +299,7 @@ with block:
|
|
200 |
|
201 |
def on_checkbox_group_change(selected_columns):
|
202 |
# pdb.set_trace()
|
203 |
-
selected_columns = [item for item in
|
204 |
present_columns = MODEL_INFO + selected_columns
|
205 |
updated_data = get_all_df()[present_columns]
|
206 |
updated_data = updated_data.sort_values(by=present_columns[3], ascending=False)
|
|
|
154 |
df = df[present_columns]
|
155 |
return df
|
156 |
|
157 |
+
def get_baseline_v2_df():
|
158 |
+
# pdb.set_trace()
|
159 |
+
df = pd.read_csv(CSV_V2_DIR)
|
160 |
+
df = df.sort_values(by="Avg. P1", ascending=False)
|
161 |
+
present_columns = MODEL_INFO_V2 + checkbox_group_v2.value
|
162 |
+
# pdb.set_trace()
|
163 |
+
df = df[present_columns]
|
164 |
+
return df
|
165 |
+
|
166 |
def get_all_df():
|
167 |
df = pd.read_csv(CSV_DIR)
|
168 |
df = df.sort_values(by="Avg. All", ascending=False)
|
169 |
return df
|
170 |
|
171 |
+
def get_all_v2_df():
|
172 |
+
df = pd.read_csv(CSV_V2_DIR)
|
173 |
+
df = df.sort_values(by="Avg. P1", ascending=False)
|
174 |
+
return df
|
175 |
+
|
176 |
block = gr.Blocks()
|
177 |
|
178 |
|
|
|
181 |
LEADERBORAD_INTRODUCTION
|
182 |
)
|
183 |
with gr.Tabs(elem_classes="tab-buttons") as tabs:
|
184 |
+
with gr.TabItem("🏅 SEED Benchmark v2", elem_id="seed-benchmark-tab-table", id=0):
|
185 |
+
with gr.Row():
|
186 |
+
with gr.Accordion("Citation", open=False):
|
187 |
+
citation_button = gr.Textbox(
|
188 |
+
value=CITATION_BUTTON_TEXT,
|
189 |
+
label=CITATION_BUTTON_LABEL,
|
190 |
+
elem_id="citation-button",
|
191 |
+
).style(show_copy_button=True)
|
192 |
+
|
193 |
+
gr.Markdown(
|
194 |
+
TABLE_INTRODUCTION
|
195 |
+
)
|
196 |
+
|
197 |
+
# selection for column part:
|
198 |
+
checkbox_group_v2 = gr.CheckboxGroup(
|
199 |
+
choices=TASK_V2_INFO,
|
200 |
+
value=AVG_V2_INFO,
|
201 |
+
label="Evaluation Dimension",
|
202 |
+
interactive=True,
|
203 |
+
)
|
204 |
+
|
205 |
+
'''
|
206 |
+
# selection for model size part:
|
207 |
+
filter_model_size = gr.CheckboxGroup(
|
208 |
+
choices=MODEL_SIZE,
|
209 |
+
value=MODEL_SIZE,
|
210 |
+
label="Model Size",
|
211 |
+
interactive=True,
|
212 |
+
)
|
213 |
+
|
214 |
+
filter_dimension_level = gr.CheckboxGroup(
|
215 |
+
choices=DIMENSION_LEVEL,
|
216 |
+
label="Model level",
|
217 |
+
multiselect=False,
|
218 |
+
value=DIMENSION_LEVEL[1],
|
219 |
+
interactive=True,
|
220 |
+
)
|
221 |
+
'''
|
222 |
+
|
223 |
+
# 创建数据帧组件
|
224 |
+
data_component_v2 = gr.components.Dataframe(
|
225 |
+
value=get_baseline_v2_df,
|
226 |
+
headers=COLUMN_V2_NAMES,
|
227 |
+
type="pandas",
|
228 |
+
datatype=DATA_TITILE_V2_TYPE,
|
229 |
+
interactive=False,
|
230 |
+
visible=True,
|
231 |
+
)
|
232 |
+
|
233 |
+
def on_checkbox_group_v2_change(selected_columns):
|
234 |
+
# pdb.set_trace()
|
235 |
+
selected_columns = [item for item in TASK_V2_INFO if item in selected_columns]
|
236 |
+
present_columns = MODEL_INFO_V2 + selected_columns
|
237 |
+
updated_data = get_all_v2_df()[present_columns]
|
238 |
+
updated_data = updated_data.sort_values(by=present_columns[2], ascending=False)
|
239 |
+
updated_headers = present_columns
|
240 |
+
# pdb.set_trace()
|
241 |
+
update_datatype = [DATA_TITILE_V2_TYPE[COLUMN_V2_NAMES.index(x)] for x in updated_headers]
|
242 |
+
|
243 |
+
filter_component = gr.components.Dataframe(
|
244 |
+
value=updated_data,
|
245 |
+
headers=updated_headers,
|
246 |
+
type="pandas",
|
247 |
+
datatype=update_datatype,
|
248 |
+
interactive=False,
|
249 |
+
visible=True,
|
250 |
+
)
|
251 |
+
# pdb.set_trace()
|
252 |
+
|
253 |
+
return filter_component.value
|
254 |
+
|
255 |
+
# 将复选框组关联到处理函数
|
256 |
+
checkbox_group_v2.change(fn=on_checkbox_group_v2_change, inputs=checkbox_group_v2, outputs=data_component_v2)
|
257 |
+
|
258 |
+
# table seed-bench-v1
|
259 |
+
with gr.TabItem("🏅 SEED Benchmark v1", elem_id="seed-benchmark-tab-table", id=1):
|
260 |
with gr.Row():
|
261 |
with gr.Accordion("Citation", open=False):
|
262 |
citation_button = gr.Textbox(
|
|
|
271 |
|
272 |
# selection for column part:
|
273 |
checkbox_group = gr.CheckboxGroup(
|
274 |
+
choices=TASK_INFO,
|
275 |
value=AVG_INFO,
|
276 |
+
label="Evaluation Dimension",
|
277 |
+
interactive=True,
|
278 |
+
)
|
279 |
+
|
280 |
+
'''
|
281 |
+
# selection for model size part:
|
282 |
+
filter_model_size = gr.CheckboxGroup(
|
283 |
+
choices=MODEL_SIZE,
|
284 |
+
value=MODEL_SIZE,
|
285 |
+
label="Model Size",
|
286 |
interactive=True,
|
287 |
)
|
288 |
+
'''
|
289 |
|
290 |
# 创建数据帧组件
|
291 |
data_component = gr.components.Dataframe(
|
|
|
299 |
|
300 |
def on_checkbox_group_change(selected_columns):
|
301 |
# pdb.set_trace()
|
302 |
+
selected_columns = [item for item in TASK_INFO if item in selected_columns]
|
303 |
present_columns = MODEL_INFO + selected_columns
|
304 |
updated_data = get_all_df()[present_columns]
|
305 |
updated_data = updated_data.sort_values(by=present_columns[3], ascending=False)
|
constants.py
CHANGED
@@ -1,36 +1,25 @@
|
|
1 |
# this is .py for store constants
|
2 |
MODEL_INFO = ["Model Type", "Model", "Language Model"]
|
3 |
-
|
4 |
-
|
|
|
|
|
|
|
|
|
5 |
|
6 |
AVG_INFO = ["Avg. All", "Avg. Img", "Avg. Video"]
|
|
|
|
|
7 |
DATA_TITILE_TYPE = ["markdown", "markdown", "markdown", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number"]
|
|
|
8 |
CSV_DIR = "./file/result.csv"
|
|
|
9 |
|
10 |
-
|
11 |
-
|
12 |
|
13 |
DATA_NUM = [3158, 1831, 4649, 978, 2447, 657, 97, 331, 85, 1740, 2077, 1192]
|
14 |
-
|
15 |
-
UNTUNED_MODEL_RESULTS = '''LLM & Flan-T5 & Flan-T5-XL &23.0 &29.0 &32.8 &31.8 &20.5 &31.8 &33.0 &18.2 &19.4 &23.2 &34.9 &25.4 \\
|
16 |
-
LLM & Vicuna & Vicuna-7B &23.4 &30.7 &29.7 &30.9 &30.8 &28.6 &29.8 &18.5 &13.4 &27.3 &34.5 &23.8 \\
|
17 |
-
LLM & LLaMA & LLaMA-7B &26.3 &27.4 &26.2 &28.3 &25.1 &28.8 &19.2 &37.0 & 9.0 &33.0 &23.1 &26.2 \\
|
18 |
-
ImageLLM & BLIP-2 & Flan-T5-XL &59.1 &53.9 &49.2 &42.3 &43.2 &36.7 &55.7 &45.6 &25.9 &32.6 &47.5 &24.0 \\
|
19 |
-
ImageLLM & InstructBLIP & Flan-T5-XL &60.3 &58.5 &63.4 &40.6 &58.4 &38.7 &51.6 &45.9 &25.9 &33.1 &49.1 &27.1 \\
|
20 |
-
ImageLLM & InstructBLIP-Vicuna & Vicuna-7B &60.2 &58.9 &65.6 &43.6 &57.2 &40.3 &52.6 &47.7 &43.5 &34.5 &49.6 &23.1 \\
|
21 |
-
ImageLLM & LLaVA & LLaMA-7B &42.7 &34.9 &33.5 &28.4 &41.9 &30.8 &27.8 &46.8 &27.7 &29.7 &21.4 &19.1 \\
|
22 |
-
ImageLLM & MiniGPT-4 & Flan-T5-XL &56.3 &49.2 &45.8 &37.9 &45.3 &32.6 &47.4 &57.1 &11.8 &38.2 &24.5 &27.1 \\
|
23 |
-
ImageLLM & VPGTrans & LLaMA-7B &51.9 &44.1 &39.9 &36.1 &33.7 &36.4 &32.0 &53.2 &30.6 &39.5 &24.3 &31.9 \\
|
24 |
-
ImageLLM & MultiModal-GPT & LLaMA-7B &43.6 &37.9 &31.5 &30.8 &27.3 &30.1 &29.9 &51.4 &18.8 &36.9 &25.8 &24.0 \\
|
25 |
-
ImageLLM & Otter & LLaMA-7B &44.9 &38.6 &32.2 &30.9 &26.3 &31.8 &32.0 &51.4 &31.8 &37.9 &27.2 &24.8 \\
|
26 |
-
ImageLLM & OpenFlamingo & LLaMA-7B &43.9 &38.1 &31.3 &30.1 &27.3 &30.6 &29.9 &50.2 &20.0 &37.2 &25.4 &24.2 \\
|
27 |
-
ImageLLM & LLaMA-Adapter V2 & LLaMA-7B &45.2 &38.5 &29.3 &33.0 &29.7 &35.5 &39.2 &52.0 &24.7 &38.6 &18.5 &19.6 \\
|
28 |
-
ImageLLM & GVT & Vicuna-7B &41.7 &35.5 &31.8 &29.5 &36.2 &32.0 &32.0 &51.1 &27.1 &33.9 &25.4 &23.0 \\
|
29 |
-
ImageLLM & mPLUG-Owl & LLaMA-7B &49.7 &45.3 &32.5 &36.7 &27.3 &32.7 &44.3 &54.7 &28.8 &26.7 &17.9 &26.5 \\
|
30 |
-
VideoLLM & VideoChat & Vicuna-7B &47.1 &43.8 &34.9 &40.0 &32.8 &34.6 &42.3 &50.5 &17.7 &34.9 &36.4 &27.3 \\
|
31 |
-
VideoLLM & Video-ChatGPT & LLaMA-7B &37.2 &31.4 &33.2 &28.4 &35.5 &29.5 &23.7 &42.3 &25.9 &27.6 &21.3 &21.1 \\
|
32 |
-
VideoLLM & Valley & LLaMA-13B &39.3 &32.9 &31.6 &27.9 &24.2 &30.1 &27.8 &43.8 &11.8 &31.3 &23.2 &20.7 \\'''
|
33 |
-
|
34 |
|
35 |
LEADERBORAD_INTRODUCTION = """# SEED-Bench Leaderboard
|
36 |
|
@@ -76,8 +65,6 @@ LEADERBORAD_INFO = """
|
|
76 |
By revealing the limitations of existing MLLMs through evaluation results, we aim for SEED-Bench to provide insights for motivating future research.
|
77 |
"""
|
78 |
|
79 |
-
|
80 |
-
|
81 |
CITATION_BUTTON_LABEL = "Copy the following snippet to cite these results"
|
82 |
CITATION_BUTTON_TEXT = r"""@article{li2023seed,
|
83 |
title={SEED-Bench: Benchmarking Multimodal LLMs with Generative Comprehension},
|
|
|
1 |
# this is .py for store constants
|
2 |
MODEL_INFO = ["Model Type", "Model", "Language Model"]
|
3 |
+
MODEL_INFO_V2 = ["Model", "Language Model"]
|
4 |
+
MODEL_SIZE = ["<10B", ">=10B"]
|
5 |
+
DIMENSION_LEVEL = ["L1", "L2", "L3"]
|
6 |
+
LEADERBOARD_VERSION = ["Version1", "Version2"]
|
7 |
+
TASK_INFO = ["Avg. All", "Avg. Img", "Avg. Video", "Scene Understanding", "Instance Identity", "Instance Attribute", "Instance Location", "Instance Counting", "Spatial Relation", "Instance Interaction", "Visual Reasoning", "Text Recognition", "Action Recognition", "Action Prediction", "Procedure Understanding"]
|
8 |
+
TASK_V2_INFO = ["Avg. P1", "Avg. P2", "Avg. P3", "Scene Understanding", "Instance Identity", "Instance Attribute", "Instance Location", "Instance Counting", "Spatial Relation", "Instance Interaction", "Visual Reasoning", "Text Recognition", "Celebrity Recognition", "Landmark Recognition", "Chart Understanding", "Visual Referring Expression", "Science Knowledge", "Emotion Recognition", "Visual Mathematics", "Difference Spotting", "Meme Comprehension", "Global Video Understanding", "Action Recognition", "Action Predicion", "Procedure Understanding", "In-Context Captioning", "Interleaved Image-Text Analysis", "Text-to-Image Generation", "Next Image Prediction", "Text-Image Creation"]
|
9 |
|
10 |
AVG_INFO = ["Avg. All", "Avg. Img", "Avg. Video"]
|
11 |
+
AVG_V2_INFO = ["Avg. P1", "Avg. P2", "Avg. P3"]
|
12 |
+
|
13 |
DATA_TITILE_TYPE = ["markdown", "markdown", "markdown", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number"]
|
14 |
+
DATA_TITILE_V2_TYPE = ["markdown", "markdown", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number"]
|
15 |
CSV_DIR = "./file/result.csv"
|
16 |
+
CSV_V2_DIR = "./file/result_v2.csv"
|
17 |
|
18 |
+
COLUMN_NAMES = MODEL_INFO + TASK_INFO
|
19 |
+
COLUMN_V2_NAMES = MODEL_INFO_V2 + TASK_V2_INFO
|
20 |
|
21 |
DATA_NUM = [3158, 1831, 4649, 978, 2447, 657, 97, 331, 85, 1740, 2077, 1192]
|
22 |
+
DATA_NUM_V2 = [3158, 1831, 4649, 978, 2447, 657, 97, 331, 435, 330, 500, 501, 199, 277, 501, 132, 501, 159, 1594, 1509, 1225, 1023, 120, 49, 1008, 81, 79]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
23 |
|
24 |
LEADERBORAD_INTRODUCTION = """# SEED-Bench Leaderboard
|
25 |
|
|
|
65 |
By revealing the limitations of existing MLLMs through evaluation results, we aim for SEED-Bench to provide insights for motivating future research.
|
66 |
"""
|
67 |
|
|
|
|
|
68 |
CITATION_BUTTON_LABEL = "Copy the following snippet to cite these results"
|
69 |
CITATION_BUTTON_TEXT = r"""@article{li2023seed,
|
70 |
title={SEED-Bench: Benchmarking Multimodal LLMs with Generative Comprehension},
|
file/result.csv
CHANGED
@@ -22,9 +22,9 @@ ImageLLM,[Qwen-VL](https://huggingface.co/Qwen/Qwen-VL),Qwen-7B,56.3,62.3,39.1,7
|
|
22 |
ImageLLM,[IDEFICS-9b-instruct](https://huggingface.co/HuggingFaceM4/idefics-9b-instruct),LLaMA-7B,0,44.5,0,55.8,45.3,42.3,40.2,36.8,34.9,37.1,55.9,38.8,0,0,0
|
23 |
ImageLLM,[IDEFICS-80b-instruct](https://huggingface.co/HuggingFaceM4/idefics-9b-instruct),LLaMA-65B,0,53.2,0,64,52.6,50.8,48.3,46.1,45.5,62.9,68,51.8,0,0,0
|
24 |
ImageLLM,[InternLM-XComposer-VL](https://github.com/InternLM/InternLM-XComposer),InternLM-7B,0,66.9,0,75,71.7,67.6,60.8,56.2,55.3,74.4,77,48.5,0,0,0
|
25 |
-
ImageLLM,[SEED-LLaMA](https://github.com/AILab-CVC/SEED),LLaMA2-Chat-
|
26 |
ImageLLM,[mPLUG-Owl2](https://github.com/X-PLUG/mPLUG-Owl),LLaMA-7B,57.8,64.1,39.8,72.7,67.6,63.6,53.6,58.5,50.8,70.1,76.4,30.2,46.0,38.7,32.9
|
27 |
-
ImageLLM,[LLaMA-VID-7B](),LLaMA-7B,59.9,67.6,37.9,75.4,71.2,68.9,62.9,58.4,50.7,70.1,76.1,54.7,42.8,35.2,35.6
|
28 |
ImageLLM,[Pink-LLaMA2](https://github.com/SY-Xuan/Pink/stargazers),LLaMA2-7B,0,67.0,0,75.2,70.1,70.1,63.3,53.8,50.2,69.1,74.3,50.0,0,0,0
|
29 |
ImageLLM,[InfMLLM-13B](https://github.com/mightyzau/InfMLLM),Vicuna-13B,62.3,69.6,41.5,75.5,73,70.4,66.2,63.3,54.2,72.2,77.9,37.2,49.5,39,33.9
|
30 |
ImageLLM,[ShareGPT4V-7B](https://github.com/InternLM/InternLM-XComposer/tree/main/projects/ShareGPT4V),Vicuna-7B,0,69.7,0,75.3,71.4,72.3,63.1,62,53.9,70.1,79.8,54.7,0,0,0
|
|
|
22 |
ImageLLM,[IDEFICS-9b-instruct](https://huggingface.co/HuggingFaceM4/idefics-9b-instruct),LLaMA-7B,0,44.5,0,55.8,45.3,42.3,40.2,36.8,34.9,37.1,55.9,38.8,0,0,0
|
23 |
ImageLLM,[IDEFICS-80b-instruct](https://huggingface.co/HuggingFaceM4/idefics-9b-instruct),LLaMA-65B,0,53.2,0,64,52.6,50.8,48.3,46.1,45.5,62.9,68,51.8,0,0,0
|
24 |
ImageLLM,[InternLM-XComposer-VL](https://github.com/InternLM/InternLM-XComposer),InternLM-7B,0,66.9,0,75,71.7,67.6,60.8,56.2,55.3,74.4,77,48.5,0,0,0
|
25 |
+
ImageLLM,[SEED-LLaMA](https://github.com/AILab-CVC/SEED),LLaMA2-Chat-13B,48.9,53.7,35.4,64.1,54.2,54.1,46.5,45.3,38.2,51.6,60.7,44.7,37.8,45.3,20.0
|
26 |
ImageLLM,[mPLUG-Owl2](https://github.com/X-PLUG/mPLUG-Owl),LLaMA-7B,57.8,64.1,39.8,72.7,67.6,63.6,53.6,58.5,50.8,70.1,76.4,30.2,46.0,38.7,32.9
|
27 |
+
ImageLLM,[LLaMA-VID-7B](https://github.com/dvlab-research/LLaMA-VID),LLaMA-7B,59.9,67.6,37.9,75.4,71.2,68.9,62.9,58.4,50.7,70.1,76.1,54.7,42.8,35.2,35.6
|
28 |
ImageLLM,[Pink-LLaMA2](https://github.com/SY-Xuan/Pink/stargazers),LLaMA2-7B,0,67.0,0,75.2,70.1,70.1,63.3,53.8,50.2,69.1,74.3,50.0,0,0,0
|
29 |
ImageLLM,[InfMLLM-13B](https://github.com/mightyzau/InfMLLM),Vicuna-13B,62.3,69.6,41.5,75.5,73,70.4,66.2,63.3,54.2,72.2,77.9,37.2,49.5,39,33.9
|
30 |
ImageLLM,[ShareGPT4V-7B](https://github.com/InternLM/InternLM-XComposer/tree/main/projects/ShareGPT4V),Vicuna-7B,0,69.7,0,75.3,71.4,72.3,63.1,62,53.9,70.1,79.8,54.7,0,0,0
|
file/result_v2.csv
ADDED
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
Model,Language Model,Avg. P1,Avg. P2,Avg. P3,Scene Understanding,Instance Identity,Instance Attribute,Instance Location,Instance Counting,Spatial Relation,Instance Interaction,Visual Reasoning,Text Recognition,Celebrity Recognition,Landmark Recognition,Chart Understanding,Visual Referring Expression,Science Knowledge,Emotion Recognition,Visual Mathematics,Difference Spotting,Meme Comprehension,Global Video Understanding,Action Recognition,Action Predicion,Procedure Understanding,In-Context Captioning,Interleaved Image-Text Analysis,Text-to-Image Generation,Next Image Prediction,Text-Image Creation
|
2 |
+
[BLIP-2](https://github.com/salesforce/LAVIS),Flan-T5-XL,41,35.3,0,58.5,48.6,49,39.1,43.4,36.2,48.5,52.9,60.7,51.8,51.4,19.2,43.2,52.4,29.3,22,17.8,38.6,42.5,37.7,36.2,22.9,40,30.6,0,0,0
|
3 |
+
[InstructBLIP](https://github.com/salesforce/LAVIS),Flan-T5-XL,42.2,35.7,0,58.9,49.7,61.7,35.1,58.1,34.9,47.4,55.9,61.4,48.5,45.4,26.4,41.7,47.7,34.5,21.2,22.8,35.2,41.5,36.1,40.5,24.5,36.7,34.7,0,0,0
|
4 |
+
[InstructBLIP-Vicuna](https://github.com/salesforce/LAVIS),Vicuna-7B,41.4,29.7,0,53.6,43.9,49,37.8,56.5,35.8,43.3,56.2,57.2,60.3,44.4,27.9,39.2,39.4,23,26.5,36.5,55.4,40.4,38.6,31.2,15.6,26.7,32.7,0,0,0
|
5 |
+
[LLaVA](https://github.com/haotian-liu/LLaVA),LLaMA-7B,38.7,30.2,0,53.8,47.5,38.3,34.2,42,34.7,40.2,52.9,46.4,51.8,45.6,30.3,40.2,37.6,34.3,20.5,27,50,44.1,36.2,25.1,18.6,40,20.4,0,0,0
|
6 |
+
[MiniGPT-4](https://github.com/Vision-CAIR/MiniGPT-4),Vicuna-7B,39.4,34.1,0,56.3,49.2,45.8,37.9,45.3,32.6,47.4,57.1,41.8,55.2,45.2,20.2,41.2,43.3,24.2,25,19,46.7,39,38.7,27.4,28.6,45.8,22.5,0,0,0
|
7 |
+
[VPGTrans](https://github.com/VPGTrans/VPGTrans),LLaMA-7B,36.2,23.9,0,46.9,38.6,33.6,35.6,27.5,34.4,33,50.8,47.6,52.4,38.2,30.1,34.7,36.1,31.5,27.3,24.6,44,37.8,38.2,20.9,33.5,19.2,28.6,0,0,0
|
8 |
+
[MultiModal-GPT](https://github.com/open-mmlab/Multimodal-GPT),LLaMA-7B,37.4,34.9,0,46.9,42.5,32,32.3,27.7,29.7,29.9,48.3,35.2,60.9,50.4,24.2,42.2,37.6,32.1,27.3,40.1,56.5,37.6,38.7,25.3,24.4,39.2,30.6,0,0,0
|
9 |
+
[Otter](https://github.com/Luodian/Otter),LLaMA-7B,36.4,36.6,0,45.9,39.7,31.9,31.6,26.4,32,33,49.2,39.3,59.7,53,23.6,41.2,36.1,37.3,22,27.4,46.7,36.6,37.9,26,24.8,42.5,30.6,0,0,0
|
10 |
+
[OpenFlamingo](https://github.com/mlfoundations/open_flamingo),LLaMA-7B,37.3,35.5,0,46.7,42.3,31.7,33.4,27.4,29.8,29.9,47.7,35.6,60.3,49.8,24.2,42.2,39,32.1,27.3,39.9,54.9,37.6,38.4,25.2,24.1,38.3,32.7,0,0,0
|
11 |
+
[LLaMA-AdapterV2](https://github.com/OpenGVLab/LLaMA-Adapter),LLaMA-7B,37.5,0,0,45.2,38.5,29.3,33,29.7,35.5,39.2,52,48.7,58.5,46.4,24.2,41.2,40.1,39.7,23.5,29.1,52.2,41.9,38.2,18.8,20.3,0,0,0,0,0
|
12 |
+
[GVT](https://github.com/TencentARC/GVT),Vicuna-7B,34.4,38.6,0,41.7,35.5,31.8,29.5,36.2,32,32,51.1,35.2,39.4,36.4,25,36.2,31.1,20.6,22.7,41.5,59.2,40.4,29.7,26.3,24.1,42.5,34.7,0,0,0
|
13 |
+
[mPLUG-Owl](https://github.com/X-PLUG/mPLUG-Owl),LLaMA-7B,39.4,28.9,0,49.7,45.3,32.5,36.7,27.3,32.7,44.3,54.7,49.2,70.9,49.6,23.2,44.2,44,32.5,23.5,33.5,54.9,42,37.8,18.3,19.3,29.2,28.6,0,0,0
|
14 |
+
[Kosmos-2](https://github.com/microsoft/unilm/tree/master/kosmos-2),Decoder only 1.3B,46.3,23.3,0,63.4,57.1,58.5,44,41.4,37.9,55.7,60.7,68.1,82.1,51.4,21.2,48.2,43.7,30.7,28,25.2,42.8,48.5,40.8,39.5,30,24.2,22.5,0,0,0
|
15 |
+
[Qwen-VL-Chat](https://huggingface.co/Qwen/Qwen-VL-Chat),Qwen-7B,43.1,35.5,0,56.5,47.6,54.8,46.9,54.2,40.3,55.7,55,47.4,62.4,55.6,25.2,43.7,41.2,20.6,28.8,34.3,47.2,39.7,42.8,29.6,19.1,42.5,28.6,0,0,0
|
16 |
+
[LLaVA-1.5](https://github.com/haotian-liu/LLaVA),vicuna-7B,47.3,30.8,0,63.7,62.4,66.7,51.3,60.2,38.5,47.4,59.8,69,60.6,49.8,25,45.7,56.7,31.1,24.2,35.7,50.3,46.1,39.4,29.4,28.1,39.2,22.5,0,0,0
|
17 |
+
[IDEFICS-9b-instruct](https://huggingface.co/HuggingFaceM4/idefics-9b-instruct),LLaMA-7B,38,40.3,0,48.2,38.2,37.8,32.9,29,32.4,37.1,54.1,45.5,52.4,52.8,22.6,42.7,33.2,26.6,21.2,56.5,48.4,42.7,38.6,23.6,20.5,45.8,34.7,0,0,0
|
18 |
+
[InternLM-XComposer-VL](https://github.com/InternLM/InternLM-XComposer),InternLM-7B,59.2,32.1,0,74.8,70.5,67.6,60.5,55.3,53.4,76.3,76.1,61.4,86.1,78,27.2,60.3,84.8,68.9,25.8,47.7,56.6,58.6,49.9,37.6,24.9,27.5,36.7,0,0,0
|
19 |
+
[Emu](https://github.com/baaivision/Emu),LLaMA-13B,42.5,41.1,41.4,59,50,43.7,37.1,44.3,33.6,49.5,58.3,61.4,68.8,61.6,19,45.7,41.5,24.2,26.4,29.3,37.1,41.9,42.7,37.9,21.8,51.7,30.6,46.8,43.2,34.2
|
20 |
+
[Next-GPT](https://github.com/NExT-GPT/NExT-GPT),vicuna-7B,30.7,35.6,33.9,36.4,35.1,25.6,29.9,36.1,30.9,39.2,41.7,31,30.9,27.4,21.2,34.2,31.8,24.4,17.4,24.2,39,35.5,33.8,25.6,24.5,46.7,24.5,45.1,19.8,36.7
|
21 |
+
[seed-llama](https://github.com/AILab-CVC/SEED),LLaMA2-Chat-13B,43.9,43.4,52.3,64,55,51.3,45.4,43.3,37.9,56.7,59.2,57,55.5,52.8,18.8,49.3,44.8,28.8,24.4,29.5,41.5,46.7,39.4,43.9,20.3,54.2,32.7,50.2,40.7,65.8
|
22 |
+
[VideoChat](https://github.com/OpenGVLab/Ask-Anything),Vicuna-7B,37,35.3,0,44.3,40.7,32.2,36.9,32.9,32.6,42.3,51.1,45.7,35.2,46.8,20.6,43.2,39.4,34.3,19.7,30.3,51.6,41.5,34,30.6,27.4,40,30.6,0,0,0
|
23 |
+
[Video-ChatGPT](https://github.com/mbzuai-oryx/Video-ChatGPT),LLaMA-7B,36.4,31,0,44.1,37,35.8,30.7,44.2,31.1,29.9,49.9,39.8,49.7,40.6,22,33.2,37.2,22.4,25,46.1,61.4,42.6,32.2,27,19,37.5,24.5,0,0,0
|
24 |
+
[Valley](https://github.com/RupertLuo/Valley),LLaMA-13B,34.5,32.2,0,45.3,36.4,33.7,30.6,27.1,31.5,35.1,52,35.2,44.9,43.4,23.8,33.2,37.2,26,22.7,37.1,52.2,31.5,32.1,21.9,26.5,35.8,28.6,0,0,0
|
src/__pycache__/utils_display.cpython-38.pyc
CHANGED
Binary files a/src/__pycache__/utils_display.cpython-38.pyc and b/src/__pycache__/utils_display.cpython-38.pyc differ
|
|
src/auto_leaderboard/__pycache__/model_metadata_type.cpython-38.pyc
CHANGED
Binary files a/src/auto_leaderboard/__pycache__/model_metadata_type.cpython-38.pyc and b/src/auto_leaderboard/__pycache__/model_metadata_type.cpython-38.pyc differ
|
|