Spaces:
Running
Running
add-new-thaiexam2-add-dash-to-no-score-model
Browse files- src/leaderboard/read_evals.py +1 -1
- src/populate.py +1 -0
src/leaderboard/read_evals.py
CHANGED
@@ -31,6 +31,7 @@ DATASET_TO_NAME_MAPPING = {
|
|
31 |
"Knowledge III": "MT-Bench: Knowledge",
|
32 |
'Social Science': 'MT-Bench: Social Science',
|
33 |
'thaiexam_qa': "Thai Exam",
|
|
|
34 |
|
35 |
'lr_sum_tha_seacrowd_t2t': '',
|
36 |
'ntrex_128_eng-US_tha_seacrowd_t2t': '',
|
@@ -156,7 +157,6 @@ class EvalResult:
|
|
156 |
for k in list(data_dict.keys()):
|
157 |
if k not in [AutoEvalColumn.model.name, 'Average ⬆️']:
|
158 |
data_dict.move_to_end(k)
|
159 |
-
|
160 |
return data_dict
|
161 |
|
162 |
|
|
|
31 |
"Knowledge III": "MT-Bench: Knowledge",
|
32 |
'Social Science': 'MT-Bench: Social Science',
|
33 |
'thaiexam_qa': "Thai Exam",
|
34 |
+
"thaiexam2_qa": "Thai Exam2",
|
35 |
|
36 |
'lr_sum_tha_seacrowd_t2t': '',
|
37 |
'ntrex_128_eng-US_tha_seacrowd_t2t': '',
|
|
|
157 |
for k in list(data_dict.keys()):
|
158 |
if k not in [AutoEvalColumn.model.name, 'Average ⬆️']:
|
159 |
data_dict.move_to_end(k)
|
|
|
160 |
return data_dict
|
161 |
|
162 |
|
src/populate.py
CHANGED
@@ -13,6 +13,7 @@ def get_leaderboard_df(results_path: str) -> pd.DataFrame:
|
|
13 |
all_data_json = [v.to_dict() for v in raw_data]
|
14 |
|
15 |
df = pd.DataFrame.from_records(all_data_json)
|
|
|
16 |
df = df.round(decimals=2)
|
17 |
df = df.sort_values(by='Average ⬆️', ascending=False)
|
18 |
return raw_data, df
|
|
|
13 |
all_data_json = [v.to_dict() for v in raw_data]
|
14 |
|
15 |
df = pd.DataFrame.from_records(all_data_json)
|
16 |
+
df = df.fillna('-')
|
17 |
df = df.round(decimals=2)
|
18 |
df = df.sort_values(by='Average ⬆️', ascending=False)
|
19 |
return raw_data, df
|