Spaces:
Sleeping
Sleeping
SEED Benchmark
Browse files
__pycache__/constants.cpython-38.pyc
ADDED
Binary file (6.35 kB). View file
|
|
app.py
CHANGED
@@ -126,6 +126,9 @@ def add_new_eval(
|
|
126 |
model_type,
|
127 |
model_name,
|
128 |
LLM_name,
|
|
|
|
|
|
|
129 |
each_task_accuracy[1],
|
130 |
each_task_accuracy[2],
|
131 |
each_task_accuracy[3],
|
@@ -135,19 +138,25 @@ def add_new_eval(
|
|
135 |
each_task_accuracy[7],
|
136 |
each_task_accuracy[8],
|
137 |
each_task_accuracy[9],
|
138 |
-
average_accuracy_image,
|
139 |
each_task_accuracy[10],
|
140 |
each_task_accuracy[11],
|
141 |
each_task_accuracy[12],
|
142 |
-
|
143 |
-
overall_accuracy]
|
144 |
-
# pdb.set_trace()
|
145 |
csv_data.loc[col] = new_data
|
146 |
csv_data = csv_data.to_csv(CSV_DIR, index=False)
|
147 |
return 0
|
148 |
|
149 |
def get_baseline_df():
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
150 |
df = pd.read_csv(CSV_DIR)
|
|
|
151 |
return df
|
152 |
|
153 |
block = gr.Blocks()
|
@@ -173,8 +182,8 @@ with block:
|
|
173 |
|
174 |
# selection for column part:
|
175 |
checkbox_group = gr.CheckboxGroup(
|
176 |
-
choices=
|
177 |
-
value=
|
178 |
label="Select options",
|
179 |
interactive=True,
|
180 |
)
|
@@ -191,9 +200,9 @@ with block:
|
|
191 |
|
192 |
def on_checkbox_group_change(selected_columns):
|
193 |
# pdb.set_trace()
|
194 |
-
selected_columns = [item for item in
|
195 |
present_columns = MODEL_INFO + selected_columns
|
196 |
-
updated_data =
|
197 |
updated_headers = present_columns
|
198 |
update_datatype = [DATA_TITILE_TYPE[COLUMN_NAMES.index(x)] for x in updated_headers]
|
199 |
|
|
|
126 |
model_type,
|
127 |
model_name,
|
128 |
LLM_name,
|
129 |
+
overall_accuracy,
|
130 |
+
average_accuracy_image,
|
131 |
+
average_accuracy_video,
|
132 |
each_task_accuracy[1],
|
133 |
each_task_accuracy[2],
|
134 |
each_task_accuracy[3],
|
|
|
138 |
each_task_accuracy[7],
|
139 |
each_task_accuracy[8],
|
140 |
each_task_accuracy[9],
|
|
|
141 |
each_task_accuracy[10],
|
142 |
each_task_accuracy[11],
|
143 |
each_task_accuracy[12],
|
144 |
+
]
|
|
|
|
|
145 |
csv_data.loc[col] = new_data
|
146 |
csv_data = csv_data.to_csv(CSV_DIR, index=False)
|
147 |
return 0
|
148 |
|
149 |
def get_baseline_df():
|
150 |
+
# pdb.set_trace()
|
151 |
+
df = pd.read_csv(CSV_DIR)
|
152 |
+
df = df.sort_values(by="Avg. All", ascending=False)
|
153 |
+
present_columns = MODEL_INFO + checkbox_group.value
|
154 |
+
df = df[present_columns]
|
155 |
+
return df
|
156 |
+
|
157 |
+
def get_all_df():
|
158 |
df = pd.read_csv(CSV_DIR)
|
159 |
+
df = df.sort_values(by="Avg. All", ascending=False)
|
160 |
return df
|
161 |
|
162 |
block = gr.Blocks()
|
|
|
182 |
|
183 |
# selection for column part:
|
184 |
checkbox_group = gr.CheckboxGroup(
|
185 |
+
choices=TASK_INFO_v2,
|
186 |
+
value=AVG_INFO,
|
187 |
label="Select options",
|
188 |
interactive=True,
|
189 |
)
|
|
|
200 |
|
201 |
def on_checkbox_group_change(selected_columns):
|
202 |
# pdb.set_trace()
|
203 |
+
selected_columns = [item for item in TASK_INFO_v2 if item in selected_columns]
|
204 |
present_columns = MODEL_INFO + selected_columns
|
205 |
+
updated_data = get_all_df()[present_columns]
|
206 |
updated_headers = present_columns
|
207 |
update_datatype = [DATA_TITILE_TYPE[COLUMN_NAMES.index(x)] for x in updated_headers]
|
208 |
|
constants.py
CHANGED
@@ -1,11 +1,15 @@
|
|
1 |
# this is .py for store constants
|
2 |
MODEL_INFO = ["Model Type", "Model", "Language Model"]
|
3 |
TASK_INFO = ["Scene Understanding", "Instance Identity", "Instance Attributes", "Instance Localization", "Instance Counting", "Spatial Relation", "Instance Interaction", "Visual Reasoning", "Text Recognition", "Avg. Img", "Action Recognition", "Action Prediction", "Procedure Understanding", "Avg. Video", "Avg. All"]
|
|
|
|
|
4 |
AVG_INFO = ["Avg. Img", "Avg. Video", "Avg. All"]
|
5 |
DATA_TITILE_TYPE = ["markdown", "markdown", "markdown", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number"]
|
6 |
CSV_DIR = "./file/result.csv"
|
7 |
|
8 |
-
COLUMN_NAMES = MODEL_INFO + TASK_INFO
|
|
|
|
|
9 |
DATA_NUM = [3158, 1831, 4649, 978, 2447, 657, 97, 331, 85, 1740, 2077, 1192]
|
10 |
|
11 |
UNTUNED_MODEL_RESULTS = '''LLM & Flan-T5 & Flan-T5-XL &23.0 &29.0 &32.8 &31.8 &20.5 &31.8 &33.0 &18.2 &19.4 &23.2 &34.9 &25.4 \\
|
|
|
1 |
# this is .py for store constants
|
2 |
MODEL_INFO = ["Model Type", "Model", "Language Model"]
|
3 |
TASK_INFO = ["Scene Understanding", "Instance Identity", "Instance Attributes", "Instance Localization", "Instance Counting", "Spatial Relation", "Instance Interaction", "Visual Reasoning", "Text Recognition", "Avg. Img", "Action Recognition", "Action Prediction", "Procedure Understanding", "Avg. Video", "Avg. All"]
|
4 |
+
TASK_INFO_v2 = ["Avg. All", "Avg. Img", "Avg. Video", "Scene Understanding", "Instance Identity", "Instance Attributes", "Instance Localization", "Instance Counting", "Spatial Relation", "Instance Interaction", "Visual Reasoning", "Text Recognition", "Action Recognition", "Action Prediction", "Procedure Understanding"]
|
5 |
+
|
6 |
AVG_INFO = ["Avg. Img", "Avg. Video", "Avg. All"]
|
7 |
DATA_TITILE_TYPE = ["markdown", "markdown", "markdown", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number"]
|
8 |
CSV_DIR = "./file/result.csv"
|
9 |
|
10 |
+
# COLUMN_NAMES = MODEL_INFO + TASK_INFO
|
11 |
+
COLUMN_NAMES = MODEL_INFO + TASK_INFO_v2
|
12 |
+
|
13 |
DATA_NUM = [3158, 1831, 4649, 978, 2447, 657, 97, 331, 85, 1740, 2077, 1192]
|
14 |
|
15 |
UNTUNED_MODEL_RESULTS = '''LLM & Flan-T5 & Flan-T5-XL &23.0 &29.0 &32.8 &31.8 &20.5 &31.8 &33.0 &18.2 &19.4 &23.2 &34.9 &25.4 \\
|
file/result.csv
CHANGED
@@ -1,20 +1,20 @@
|
|
1 |
-
Model Type,Model,Language Model,Scene Understanding,Instance Identity,Instance Attributes,Instance Localization,Instance Counting,Spatial Relation,Instance Interaction,Visual Reasoning,Text Recognition,
|
2 |
-
LLM,[Flan-T5](https://huggingface.co/google/flan-t5-xl),Flan-T5-XL,23.0,29.0,32.8,31.8,20.5,31.8,33.0,18.2,19.4,
|
3 |
-
LLM,[Vicuna](https://huggingface.co/lmsys/vicuna-7b-v1.3),Vicuna-7B,23.4,30.7,29.7,30.9,30.8,28.6,29.8,18.5,13.4,
|
4 |
-
LLM,[LLaMA](https://research.facebook.com/publications/llama-open-and-efficient-foundation-language-models/),LLaMA-7B,26.3,27.4,26.2,28.3,25.1,28.8,19.2,37.0,9.0,
|
5 |
-
ImageLLM,[BLIP-2](https://github.com/salesforce/LAVIS),Flan-T5-XL,59.1,53.9,49.2,42.3,43.2,36.7,55.7,45.6,25.9,
|
6 |
-
ImageLLM,[InstructBLIP](https://github.com/salesforce/LAVIS),Flan-T5-XL,60.3,58.5,63.4,40.6,58.4,38.7,51.6,45.9,25.9,
|
7 |
-
ImageLLM,[InstructBLIP-Vicuna](https://github.com/salesforce/LAVIS),Vicuna-7B,60.2,58.9,65.6,43.6,57.2,40.3,52.6,47.7,43.5,
|
8 |
-
ImageLLM,[LLaVA](https://github.com/haotian-liu/LLaVA),LLaMA-7B,42.7,34.9,33.5,28.4,41.9,30.8,27.8,46.8,27.7,
|
9 |
-
ImageLLM,[MiniGPT-4](https://github.com/Vision-CAIR/MiniGPT-4),Flan-T5-XL,56.3,49.2,45.8,37.9,45.3,32.6,47.4,57.1,11.8,
|
10 |
-
ImageLLM,[VPGTrans](https://github.com/VPGTrans/VPGTrans),LLaMA-7B,51.9,44.1,39.9,36.1,33.7,36.4,32.0,53.2,30.6,
|
11 |
-
ImageLLM,[MultiModal-GPT](https://github.com/open-mmlab/Multimodal-GPT),LLaMA-7B,43.6,37.9,31.5,30.8,27.3,30.1,29.9,51.4,18.8,
|
12 |
-
ImageLLM,[Otter](https://github.com/Luodian/Otter),LLaMA-7B,44.9,38.6,32.2,30.9,26.3,31.8,32.0,51.4,31.8,
|
13 |
-
ImageLLM,[OpenFlamingo](https://github.com/mlfoundations/open_flamingo),LLaMA-7B,43.9,38.1,31.3,30.1,27.3,30.6,29.9,50.2,20.0,
|
14 |
-
ImageLLM,[LLaMA-AdapterV2](https://github.com/OpenGVLab/LLaMA-Adapter),LLaMA-7B,45.2,38.5,29.3,33.0,29.7,35.5,39.2,52.0,24.7,
|
15 |
-
ImageLLM,[GVT](https://github.com/TencentARC/GVT),Vicuna-7B,41.7,35.5,31.8,29.5,36.2,32.0,32.0,51.1,27.1,
|
16 |
-
ImageLLM,[mPLUG-Owl](https://github.com/X-PLUG/mPLUG-Owl),LLaMA-7B,49.7,45.3,32.5,36.7,27.3,32.7,44.3,54.7,28.8,
|
17 |
-
ImageLLM,[Kosmos-2](https://github.com/microsoft/unilm/tree/master/kosmos-2),Decoder Only 1.3B,63.4,57.1,58.5,44.0,41.4,37.9,55.7,60.7,25.9,
|
18 |
-
VideoLLM,[VideoChat](https://github.com/OpenGVLab/Ask-Anything),Vicuna-7B,47.1,43.8,34.9,40.0,32.8,34.6,42.3,50.5,17.7,
|
19 |
-
VideoLLM,[Video-ChatGPT](https://github.com/mbzuai-oryx/Video-ChatGPT),LLaMA-7B,37.2,31.4,33.2,28.4,35.5,29.5,23.7,42.3,25.9,
|
20 |
-
VideoLLM,[Valley](https://github.com/RupertLuo/Valley),LLaMA-13B,39.3,32.9,31.6,27.9,24.2,30.1,27.8,43.8,11.8,
|
|
|
1 |
+
Model Type,Model,Language Model,Avg. All,Avg. Img,Avg. Video,Scene Understanding,Instance Identity,Instance Attributes,Instance Localization,Instance Counting,Spatial Relation,Instance Interaction,Visual Reasoning,Text Recognition,Action Recognition,Action Prediction,Procedure Understanding
|
2 |
+
LLM,[Flan-T5](https://huggingface.co/google/flan-t5-xl),Flan-T5-XL,27.7,27.3,28.6,23.0,29.0,32.8,31.8,20.5,31.8,33.0,18.2,19.4,23.2,34.9,25.4
|
3 |
+
LLM,[Vicuna](https://huggingface.co/lmsys/vicuna-7b-v1.3),Vicuna-7B,28.5,28.2,29.5,23.4,30.7,29.7,30.9,30.8,28.6,29.8,18.5,13.4,27.3,34.5,23.8
|
4 |
+
LLM,[LLaMA](https://research.facebook.com/publications/llama-open-and-efficient-foundation-language-models/),LLaMA-7B,26.8,26.6,27.3,26.3,27.4,26.2,28.3,25.1,28.8,19.2,37.0,9.0,33.0,23.1,26.2
|
5 |
+
ImageLLM,[BLIP-2](https://github.com/salesforce/LAVIS),Flan-T5-XL,46.4,49.7,36.7,59.1,53.9,49.2,42.3,43.2,36.7,55.7,45.6,25.9,32.6,47.5,24.0
|
6 |
+
ImageLLM,[InstructBLIP](https://github.com/salesforce/LAVIS),Flan-T5-XL,52.7,57.8,38.3,60.3,58.5,63.4,40.6,58.4,38.7,51.6,45.9,25.9,33.1,49.1,27.1
|
7 |
+
ImageLLM,[InstructBLIP-Vicuna](https://github.com/salesforce/LAVIS),Vicuna-7B,53.4,58.8,38.1,60.2,58.9,65.6,43.6,57.2,40.3,52.6,47.7,43.5,34.5,49.6,23.1
|
8 |
+
ImageLLM,[LLaVA](https://github.com/haotian-liu/LLaVA),LLaMA-7B,33.5,37.0,23.8,42.7,34.9,33.5,28.4,41.9,30.8,27.8,46.8,27.7,29.7,21.4,19.1
|
9 |
+
ImageLLM,[MiniGPT-4](https://github.com/Vision-CAIR/MiniGPT-4),Flan-T5-XL,42.8,47.4,29.9,56.3,49.2,45.8,37.9,45.3,32.6,47.4,57.1,11.8,38.2,24.5,27.1
|
10 |
+
ImageLLM,[VPGTrans](https://github.com/VPGTrans/VPGTrans),LLaMA-7B,39.1,41.8,31.4,51.9,44.1,39.9,36.1,33.7,36.4,32.0,53.2,30.6,39.5,24.3,31.9
|
11 |
+
ImageLLM,[MultiModal-GPT](https://github.com/open-mmlab/Multimodal-GPT),LLaMA-7B,33.2,34.5,29.2,43.6,37.9,31.5,30.8,27.3,30.1,29.9,51.4,18.8,36.9,25.8,24.0
|
12 |
+
ImageLLM,[Otter](https://github.com/Luodian/Otter),LLaMA-7B,33.9,35.2,30.4,44.9,38.6,32.2,30.9,26.3,31.8,32.0,51.4,31.8,37.9,27.2,24.8
|
13 |
+
ImageLLM,[OpenFlamingo](https://github.com/mlfoundations/open_flamingo),LLaMA-7B,33.1,34.5,29.3,43.9,38.1,31.3,30.1,27.3,30.6,29.9,50.2,20.0,37.2,25.4,24.2
|
14 |
+
ImageLLM,[LLaMA-AdapterV2](https://github.com/OpenGVLab/LLaMA-Adapter),LLaMA-7B,32.7,35.2,25.8,45.2,38.5,29.3,33.0,29.7,35.5,39.2,52.0,24.7,38.6,18.5,19.6
|
15 |
+
ImageLLM,[GVT](https://github.com/TencentARC/GVT),Vicuna-7B,33.5,35.5,27.8,41.7,35.5,31.8,29.5,36.2,32.0,32.0,51.1,27.1,33.9,25.4,23.0
|
16 |
+
ImageLLM,[mPLUG-Owl](https://github.com/X-PLUG/mPLUG-Owl),LLaMA-7B,34.0,37.9,23.0,49.7,45.3,32.5,36.7,27.3,32.7,44.3,54.7,28.8,26.7,17.9,26.5
|
17 |
+
ImageLLM,[Kosmos-2](https://github.com/microsoft/unilm/tree/master/kosmos-2),Decoder Only 1.3B,50.0,54.4,37.5,63.4,57.1,58.5,44.0,41.4,37.9,55.7,60.7,25.9,41.3,40.4,27.0
|
18 |
+
VideoLLM,[VideoChat](https://github.com/OpenGVLab/Ask-Anything),Vicuna-7B,37.6,39.0,33.7,47.1,43.8,34.9,40.0,32.8,34.6,42.3,50.5,17.7,34.9,36.4,27.3
|
19 |
+
VideoLLM,[Video-ChatGPT](https://github.com/mbzuai-oryx/Video-ChatGPT),LLaMA-7B,31.2,33.9,23.5,37.2,31.4,33.2,28.4,35.5,29.5,23.7,42.3,25.9,27.6,21.3,21.1
|
20 |
+
VideoLLM,[Valley](https://github.com/RupertLuo/Valley),LLaMA-13B,30.3,32.0,25.4,39.3,32.9,31.6,27.9,24.2,30.1,27.8,43.8,11.8,31.3,23.2,20.7
|
src/__pycache__/utils_display.cpython-38.pyc
ADDED
Binary file (4.26 kB). View file
|
|
src/auto_leaderboard/__pycache__/model_metadata_type.cpython-38.pyc
ADDED
Binary file (1.22 kB). View file
|
|