Spaces:

opencompass
/

openvlm_video_leaderboard

Running

App Files Files Community

nebulae09 commited on 21 days ago

Commit

5ab1442

•

1 Parent(s): 8e5bfdb

update code with MLVU and TempCompass

Browse files

Files changed (2) hide show

app.py +2 -2
lb_info.py +42 -8

app.py CHANGED Viewed

@@ -8,8 +8,8 @@ with gr.Blocks() as demo:
     EVAL_TIME = format_timestamp(timestamp)
     results = struct['results']
     N_MODEL = len(results)
-    N_DATA = len(results['Video-LLaVA']) - 1
-    DATASETS = list(results['Video-LLaVA'])
     DATASETS.remove('META')
     print(DATASETS)

     EVAL_TIME = format_timestamp(timestamp)
     results = struct['results']
     N_MODEL = len(results)
+    N_DATA = len(results['Video-LLaVA-7B']) - 1
+    DATASETS = list(results['Video-LLaVA-7B'])
     DATASETS.remove('META')
     print(DATASETS)

lb_info.py CHANGED Viewed

@@ -36,7 +36,7 @@ This leaderboard was last updated: {}.
 """
 # CONSTANTS-FIELDS
 META_FIELDS = ['Method', 'Parameters (B)', 'Language Model', 'Vision Model', 'OpenSource', 'Verified', 'Frames']
-MAIN_FIELDS = ['MVBench', 'Video-MME (w/o subs)', 'MMBench-Video']
 MODEL_SIZE = ['<10B', '10B-20B', '20B-40B', '>40B', 'Unknown']
 MODEL_TYPE = ['API', 'OpenSource']
@@ -134,13 +134,41 @@ def BUILD_L1_DF(results, fields):
                 res[k].append(meta[k])
         scores, ranks = [], []
         for d in fields:
-            res[d].append(item[d]['Overall'])
-            # scores.append(item[d]['Overall'])
             if d == 'MMBench-Video':
                 scores.append(item[d]['Overall'] / 3 * 100)
             else:
                 scores.append(item[d]['Overall'])
-            ranks.append(nth_large(item[d]['Overall'], [x[d]['Overall'] for x in results.values() if 'Overall' in x[d]]))
         res['Avg Score'].append(round(np.mean(scores), 1))
         res['Avg Rank'].append(round(np.mean(ranks), 2))
@@ -160,13 +188,13 @@ def BUILD_L1_DF(results, fields):
 def BUILD_L2_DF(results, dataset):
     res = defaultdict(list)
     fields = list(list(results.values())[0][dataset].keys())
-    non_overall_fields = [x for x in fields if 'Overall' not in x]
-    overall_fields = [x for x in fields if 'Overall' in x]
     for m in results:
         item = results[m]
         meta = item['META']
-        if item[dataset] == {}:
             continue
         for k in META_FIELDS:
             if k == 'Parameters (B)':
@@ -186,7 +214,12 @@ def BUILD_L2_DF(results, dataset):
             res[d].append(item[dataset][d])
     df = pd.DataFrame(res)
-    df = df.sort_values('Overall')
     df = df.iloc[::-1]
     check_box = {}
@@ -202,4 +235,5 @@ def BUILD_L2_DF(results, dataset):
     type_map['Method'] = 'html'
     type_map['Language Model'] = type_map['Vision Model'] = type_map['OpenSource'] = type_map['Verified'] = type_map['Frames'] ='str'
     check_box['type_map'] = type_map
     return df, check_box

 """
 # CONSTANTS-FIELDS
 META_FIELDS = ['Method', 'Parameters (B)', 'Language Model', 'Vision Model', 'OpenSource', 'Verified', 'Frames']
+MAIN_FIELDS = ['MVBench', 'Video-MME (w/o subs)', 'MMBench-Video', 'TempCompass', 'MLVU']
 MODEL_SIZE = ['<10B', '10B-20B', '20B-40B', '>40B', 'Unknown']
 MODEL_TYPE = ['API', 'OpenSource']
                 res[k].append(meta[k])
         scores, ranks = [], []
         for d in fields:
+            # if d == 'MLVU':
+            #     item[d]['Overall'] = item[d]['M-Avg'] * 0.84 + item[d]['G-Avg'] * 10 * 0.16
+            # elif d == 'TempCompass':
+            #     item[d]['Overall'] = item[d]['overall']
+            if d == 'MLVU':
+                res[d].append(
+                    f'M-Avg: {item[d]["M-Avg"]}, G-Avg: {item[d]["G-Avg"]}'
+                    # {
+                    #     'M-Avg': item[d]['M-Avg'],
+                    #     'G-Avg': item[d]['G-Avg']
+                    # }
+                )
+            elif d == 'TempCompass':
+                res[d].append(item[d]['overall'])
+            else:
+                res[d].append(item[d]['Overall'])
             if d == 'MMBench-Video':
                 scores.append(item[d]['Overall'] / 3 * 100)
+            elif d == 'TempCompass':
+                scores.append(item[d]['overall'])
+            elif d == 'MLVU':
+                scores.append(item[d]['M-Avg'] * 0.84 + item[d]['G-Avg'] * 10 * 0.16)
             else:
                 scores.append(item[d]['Overall'])
+            if d == 'MLVU':
+                ranks.append(nth_large(
+                    item[d]['M-Avg'] * 0.84 + item[d]['G-Avg'] * 10 * 0.16,
+                    [x[d]['M-Avg'] * 0.84 + x[d]['G-Avg'] * 10 * 0.16 for x in results.values() if d in x and 'M-Avg' in x[d] and 'G-Avg' in x[d]]
+                ))
+            elif d == 'TempCompass':
+                ranks.append(nth_large(item[d]['overall'], [x[d]['overall'] for x in results.values() if d in x and 'overall' in x[d]]))
+            else:
+                ranks.append(nth_large(item[d]['Overall'], [x[d]['Overall'] for x in results.values() if d in x and 'Overall' in x[d]]))
         res['Avg Score'].append(round(np.mean(scores), 1))
         res['Avg Rank'].append(round(np.mean(ranks), 2))
 def BUILD_L2_DF(results, dataset):
     res = defaultdict(list)
     fields = list(list(results.values())[0][dataset].keys())
+    non_overall_fields = [x for x in fields if 'Overall' not in x and 'Avg' not in x and 'overall' not in x]
+    overall_fields = [x for x in fields if 'Overall' in x or 'Avg' in x or 'overall' in x]
     for m in results:
         item = results[m]
         meta = item['META']
+        if dataset not in item or item[dataset] == {}:
             continue
         for k in META_FIELDS:
             if k == 'Parameters (B)':
             res[d].append(item[dataset][d])
     df = pd.DataFrame(res)
+    if dataset == 'MLVU':
+        df = df.sort_values('M-Avg')
+    elif dataset == 'TempCompass':
+        df = df.sort_values('overall')
+    else:
+        df = df.sort_values('Overall')
     df = df.iloc[::-1]
     check_box = {}
     type_map['Method'] = 'html'
     type_map['Language Model'] = type_map['Vision Model'] = type_map['OpenSource'] = type_map['Verified'] = type_map['Frames'] ='str'
     check_box['type_map'] = type_map
+    # print(check_box, dataset, df.columns)
     return df, check_box