g8a9 commited on
Commit
d2e5799
β€’
2 Parent(s): 3e2fdcf 45517cb

update latest results

Browse files
Files changed (5) hide show
  1. app.py +95 -83
  2. latest_results.tsv +18 -18
  3. src/about.py +19 -17
  4. src/display/utils.py +4 -1
  5. src/leaderboard/read_evals.py +15 -2
app.py CHANGED
@@ -123,6 +123,11 @@ def filter_models(
123
  return filtered_df
124
 
125
 
 
 
 
 
 
126
  demo = gr.Blocks(css=custom_css)
127
  with demo:
128
  gr.HTML(TITLE)
@@ -130,57 +135,64 @@ with demo:
130
 
131
  with gr.Tabs(elem_classes="tab-buttons") as tabs:
132
  with gr.TabItem("πŸ… LLM Benchmark", elem_id="llm-benchmark-tab-table", id=0):
133
- with gr.Row():
134
- with gr.Column():
135
- with gr.Row():
136
- search_bar = gr.Textbox(
137
- placeholder=" πŸ” Search for your model (separate multiple queries with `;`) and press ENTER...",
138
- show_label=False,
139
- elem_id="search-bar",
140
- )
141
- with gr.Row():
142
- shown_columns = gr.CheckboxGroup(
143
- choices=[c.name for c in fields(AutoEvalColumn) if not c.hidden and not c.never_hidden],
144
- value=[
145
- c.name
146
- for c in fields(AutoEvalColumn)
147
- if c.displayed_by_default and not c.hidden and not c.never_hidden
148
- ],
149
- label="Select columns to show",
150
- elem_id="column-select",
151
- interactive=True,
152
- )
153
- with gr.Row():
154
- deleted_models_visibility = gr.Checkbox(
155
- value=False, label="Show gated/private/deleted models", interactive=True
156
- )
157
- with gr.Column(min_width=320):
158
- # with gr.Box(elem_id="box-filter"):
159
- filter_columns_type = gr.CheckboxGroup(
160
- label="Model types",
161
- choices=[t.to_str() for t in ModelType],
162
- value=[t.to_str() for t in ModelType],
163
- interactive=True,
164
- elem_id="filter-columns-type",
165
- )
166
- filter_columns_precision = gr.CheckboxGroup(
167
- label="Precision",
168
- choices=[i.value.name for i in Precision],
169
- value=[i.value.name for i in Precision],
170
- interactive=True,
171
- elem_id="filter-columns-precision",
172
- )
173
- filter_columns_size = gr.CheckboxGroup(
174
- label="Model sizes (in billions of parameters)",
175
- choices=list(NUMERIC_INTERVALS.keys()),
176
- value=list(NUMERIC_INTERVALS.keys()),
177
- interactive=True,
178
- elem_id="filter-columns-size",
179
- )
 
180
 
181
  leaderboard_table = gr.components.Dataframe(
182
- value=leaderboard_df[[c.name for c in fields(AutoEvalColumn) if c.never_hidden] + shown_columns.value],
183
- headers=[c.name for c in fields(AutoEvalColumn) if c.never_hidden] + shown_columns.value,
 
 
 
 
 
 
184
  datatype=TYPES,
185
  elem_id="leaderboard-table",
186
  interactive=False,
@@ -194,40 +206,40 @@ with demo:
194
  datatype=TYPES,
195
  visible=False,
196
  )
197
- search_bar.submit(
198
- update_table,
199
- [
200
- hidden_leaderboard_table_for_search,
201
- shown_columns,
202
- filter_columns_type,
203
- filter_columns_precision,
204
- filter_columns_size,
205
- deleted_models_visibility,
206
- search_bar,
207
- ],
208
- leaderboard_table,
209
- )
210
- for selector in [
211
- shown_columns,
212
- filter_columns_type,
213
- filter_columns_precision,
214
- filter_columns_size,
215
- deleted_models_visibility,
216
- ]:
217
- selector.change(
218
- update_table,
219
- [
220
- hidden_leaderboard_table_for_search,
221
- shown_columns,
222
- filter_columns_type,
223
- filter_columns_precision,
224
- filter_columns_size,
225
- deleted_models_visibility,
226
- search_bar,
227
- ],
228
- leaderboard_table,
229
- queue=True,
230
- )
231
 
232
  with gr.TabItem("πŸ“ About", elem_id="llm-benchmark-tab-table", id=2):
233
  gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
 
123
  return filtered_df
124
 
125
 
126
+ shown_columns = [
127
+ c.name for c in fields(AutoEvalColumn) if c.displayed_by_default and not c.hidden and not c.never_hidden
128
+ ]
129
+
130
+
131
  demo = gr.Blocks(css=custom_css)
132
  with demo:
133
  gr.HTML(TITLE)
 
135
 
136
  with gr.Tabs(elem_classes="tab-buttons") as tabs:
137
  with gr.TabItem("πŸ… LLM Benchmark", elem_id="llm-benchmark-tab-table", id=0):
138
+ # with gr.Row():
139
+ # # with gr.Column():
140
+ # # with gr.Row():
141
+ # search_bar = gr.Textbox(
142
+ # placeholder=" πŸ” Search for your model (separate multiple queries with `;`) and press ENTER...",
143
+ # show_label=False,
144
+ # elem_id="search-bar",
145
+ # )
146
+
147
+ # # with gr.Row():
148
+ # # shown_columns = gr.CheckboxGroup(
149
+ # # choices=[c.name for c in fields(AutoEvalColumn) if not c.hidden and not c.never_hidden],
150
+ # # value=[
151
+ # # c.name
152
+ # # for c in fields(AutoEvalColumn)
153
+ # # if c.displayed_by_default and not c.hidden and not c.never_hidden
154
+ # # ],
155
+ # # label="Select columns to show",
156
+ # # elem_id="column-select",
157
+ # # interactive=True,
158
+ # # )
159
+ # # with gr.Row():
160
+ # # deleted_models_visibility = gr.Checkbox(
161
+ # # value=False, label="Show gated/private/deleted models", interactive=True
162
+ # # )
163
+ # # with gr.Column(min_width=320):
164
+ # # with gr.Box(elem_id="box-filter"):
165
+ # filter_columns_type = gr.CheckboxGroup(
166
+ # label="Model types",
167
+ # choices=[t.to_str() for t in ModelType],
168
+ # value=[t.to_str() for t in ModelType],
169
+ # interactive=True,
170
+ # elem_id="filter-columns-type",
171
+ # )
172
+ # # filter_columns_precision = gr.CheckboxGroup(
173
+ # # label="Precision",
174
+ # # choices=[i.value.name for i in Precision],
175
+ # # value=[i.value.name for i in Precision],
176
+ # # interactive=True,
177
+ # # elem_id="filter-columns-precision",
178
+ # # )
179
+ # filter_columns_size = gr.CheckboxGroup(
180
+ # label="Model sizes (in billions of parameters)",
181
+ # choices=list(NUMERIC_INTERVALS.keys()),
182
+ # value=list(NUMERIC_INTERVALS.keys()),
183
+ # interactive=True,
184
+ # elem_id="filter-columns-size",
185
+ # )
186
 
187
  leaderboard_table = gr.components.Dataframe(
188
+ value=leaderboard_df[
189
+ [c.name for c in fields(AutoEvalColumn) if c.never_hidden or c.displayed_by_default]
190
+ ].applymap(
191
+ lambda x: x if isinstance(x, str) or isinstance(x, float) else round(x["value"], 2)
192
+ ), # ,# ] + shown_columns],
193
+ headers=[
194
+ c.name for c in fields(AutoEvalColumn) if c.never_hidden or c.displayed_by_default
195
+ ], ##, if c.never_hidden] + shown_columns,
196
  datatype=TYPES,
197
  elem_id="leaderboard-table",
198
  interactive=False,
 
206
  datatype=TYPES,
207
  visible=False,
208
  )
209
+ # search_bar.submit(
210
+ # update_table,
211
+ # [
212
+ # hidden_leaderboard_table_for_search,
213
+ # # None,
214
+ # filter_columns_type,
215
+ # # filter_columns_precision,
216
+ # filter_columns_size,
217
+ # # None,
218
+ # search_bar,
219
+ # ],
220
+ # leaderboard_table,
221
+ # )
222
+ # for selector in [
223
+ # # shown_columns,
224
+ # filter_columns_type,
225
+ # # filter_columns_precision,
226
+ # filter_columns_size,
227
+ # # deleted_models_visibility,
228
+ # ]:
229
+ # selector.change(
230
+ # update_table,
231
+ # [
232
+ # hidden_leaderboard_table_for_search,
233
+ # # None,
234
+ # filter_columns_type,
235
+ # # filter_columns_precision,
236
+ # filter_columns_size,
237
+ # # None,
238
+ # search_bar,
239
+ # ],
240
+ # leaderboard_table,
241
+ # queue=True,
242
+ # )
243
 
244
  with gr.TabItem("πŸ“ About", elem_id="llm-benchmark-tab-table", id=2):
245
  gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
latest_results.tsv CHANGED
@@ -1,18 +1,18 @@
1
- eval_name Precision Type T Weight type Architecture Model Average ⬆️ Hub License #Params (B) Model sha Hub ❀️ Available on the hub Code Data AMI 2020 Agg AMI 2020 Miso ARC-C Belebele GeNTE Neutralizing HaSpeeDe2 HS HaSpeeDe2 Stereo HateCheck HONEST IronITA Irony IronITA Sarcasm ItaCoLA News Sum SENTIPOLC SQuAD it TruthfulQA XCOPA
2
- 0 swap-uniba_LLaMAntino-3-ANITA-8B-Inst-DPO-ITA_? ? ? Unknown LlamaForCausalLM "<a target=""_blank"" href=""https://huggingface.co/swap-uniba/LLaMAntino-3-ANITA-8B-Inst-DPO-ITA"" style=""color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;"">swap-uniba/LLaMAntino-3-ANITA-8B-Inst-DPO-ITA</a>" 59.925692200303324 0 True ? ? 49.61934617107031 73.58604698054239 56.484641638225256 83.55555555555556 33.8255033557047 72.24399819126907 61.627116844508144 80.51511613552358 100.0 67.79529918401192 46.19514665929917 0.24261234404280246 33.783978293075634 46.49499761664646 71.27317142821833 68.09423700746308 73.4
3
- 9 mistralai_Mistral-7B-Instruct-v0.2_? ? ? Unknown MistralForCausalLM "<a target=""_blank"" href=""https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2"" style=""color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;"">mistralai/Mistral-7B-Instruct-v0.2</a>" 57.57154925481929 0 True ? ? 61.95096430524839 66.42194008585093 44.36860068259386 67.22222222222223 29.12751677852349 71.07491292799637 67.27017961567233 78.40873056250285 100.0 59.16469471738617 55.53851376330874 0.27708420363666786 36.377962201593874 50.02052664310759 68.04841543730598 59.24407318497844 64.2
4
- 2 mii-community_zefiro-7b-dpo-ITA_? ? ? Unknown MistralForCausalLM "<a target=""_blank"" href=""https://huggingface.co/mii-community/zefiro-7b-dpo-ITA"" style=""color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;"">mii-community/zefiro-7b-dpo-ITA</a>" 55.96825697198048 0 True ? ? 59.97920997920998 66.14027143881808 44.19795221843004 65.88888888888889 29.798657718120808 66.93068606112085 61.46209894750329 82.83622905315102 100.0 58.523449206965395 54.918191698733956 0.22337556862808253 35.66642647158017 38.80971929318383 74.34293876621986 43.34227321311386 68.4
5
- 7 meta-llama_Meta-Llama-3-8B_? ? ? Unknown LlamaForCausalLM "<a target=""_blank"" href=""https://huggingface.co/meta-llama/Meta-Llama-3-8B"" style=""color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;"">meta-llama/Meta-Llama-3-8B</a>" 55.933099551030125 0 True ? ? 60.02710027100271 63.14678395603251 40.529010238907844 76.0 29.53020134228188 65.30297764359561 59.541073390095356 79.81131536880565 100.0 57.31801541230962 56.750548188367965 0.2786244415689118 32.93607461627173 39.93136214294286 76.49082768675667 42.06877766857276 71.2
6
- 10 mii-community_zefiro-7b-base-ITA_? ? ? Unknown MistralForCausalLM "<a target=""_blank"" href=""https://huggingface.co/mii-community/zefiro-7b-base-ITA"" style=""color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;"">mii-community/zefiro-7b-base-ITA</a>" 55.286768709834995 0 True ? ? 60.14362403797995 64.54082375784897 40.955631399317404 58.55555555555556 28.456375838926174 66.12858980217781 59.74063711314884 82.46753086246828 100.0 59.05311714498798 57.8863223808017 0.09963712635854956 34.19887652648641 39.18986054178559 75.6692177776856 46.18926820166605 66.60000000000001
7
- 15 mii-community_zefiro-7b-sft-ITA_? ? ? Unknown MistralForCausalLM "<a target=""_blank"" href=""https://huggingface.co/mii-community/zefiro-7b-sft-ITA"" style=""color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;"">mii-community/zefiro-7b-sft-ITA</a>" 55.135348610310785 0 True ? ? 60.458679319889285 63.51256529535591 42.32081911262799 67.77777777777779 27.248322147651006 65.72752014372092 60.158604473839915 83.05031763559394 100.0 52.69566548195397 51.630329924754 0.08940878967203518 34.80608014621687 43.75098014181036 74.55382319645513 42.52003278796414 67.0
8
- 1 mistralai_Mistral-7B-v0.1_? ? ? Unknown MistralForCausalLM "<a target=""_blank"" href=""https://huggingface.co/mistralai/Mistral-7B-v0.1"" style=""color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;"">mistralai/Mistral-7B-v0.1</a>" 54.550973703693096 0 True ? ? 60.52050697114497 63.66158365032981 41.21160409556314 65.66666666666666 29.53020134228188 60.38816689466484 57.907599364752336 80.59264657366079 100.0 55.23299236027556 55.67900219124808 0.131895692851752 34.09475870496535 38.87141003943634 75.08500650762954 43.19251190731156 65.60000000000001
9
- 14 swap-uniba_LLaMAntino-2-chat-13b-hf-ITA_? ? ? Unknown LlamaForCausalLM "<a target=""_blank"" href=""https://huggingface.co/swap-uniba/LLaMAntino-2-chat-13b-hf-ITA"" style=""color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;"">swap-uniba/LLaMAntino-2-chat-13b-hf-ITA</a>" 53.88562700961127 0 True ? ? 61.41230947327803 64.77739009492042 39.07849829351536 60.44444444444444 25.503355704697988 67.1548291501024 59.101414060364085 81.83763297921335 100.0 57.92048929663609 52.2777996043644 0.1015435288181161 23.81691473597593 34.69232896418751 73.10003377486571 44.43667505800782 70.39999999999999
10
- 6 swap-uniba_LLaMAntino-2-13b-hf-ITA_? ? ? Unknown LlamaForCausalLM "<a target=""_blank"" href=""https://huggingface.co/swap-uniba/LLaMAntino-2-13b-hf-ITA"" style=""color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;"">swap-uniba/LLaMAntino-2-13b-hf-ITA</a>" 51.26001015437534 0 True ? ? 56.79723502304148 60.93495016444478 38.56655290102389 52.33333333333333 24.697986577181208 57.1976786986929 54.2447910290625 68.16391542846057 100.0 56.51605280366516 51.571111501558086 0.16387751408972254 23.495330157527007 38.60258050721683 74.20709928774112 42.12767769734222 71.8
11
- 13 meta-llama_Llama-2-13b-hf_? ? ? Unknown LlamaForCausalLM "<a target=""_blank"" href=""https://huggingface.co/meta-llama/Llama-2-13b-hf"" style=""color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;"">meta-llama/Llama-2-13b-hf</a>" 51.16172039685661 0 True ? ? 53.24565637065637 59.32319654843206 39.93174061433447 49.666666666666664 24.295302013422816 54.13600451447075 54.88702987697715 74.1483219663718 100.0 50.34584608393744 49.636673785442774 0.11758183179468357 35.09699883531247 37.37259554778931 75.22840229480128 42.91722979615231 69.39999999999999
12
- 5 g8a9_tweety-mistral-7b_? ? ? Unknown MistralForCausalLM "<a target=""_blank"" href=""https://huggingface.co/g8a9/tweety-mistral-7b"" style=""color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;"">g8a9/tweety-mistral-7b</a>" 48.37800669811661 0 True ? ? 56.17170479302832 56.423255312264054 37.96928327645051 49.666666666666664 27.91946308724832 53.70259637851317 53.57434872305199 64.41588573083048 100.0 50.21506876304183 49.42973129711966 0.11006633622278786 18.81035591897043 28.46426204947685 64.39794432633592 37.75548120876122 73.4
13
- 4 meta-llama_Llama-2-7b-hf_? ? ? Unknown LlamaForCausalLM "<a target=""_blank"" href=""https://huggingface.co/meta-llama/Llama-2-7b-hf"" style=""color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;"">meta-llama/Llama-2-7b-hf</a>" 47.26821759114118 0 True ? ? 50.26836062232489 57.089775606014214 35.153583617747444 36.11111111111111 25.100671140939596 49.33536331841416 51.73318260900284 67.35406316275402 100.0 47.63910390674802 48.347086153434084 0.036528464070504335 33.756452251726735 27.82288694076669 68.6449557225095 39.16657442183614 66.0
14
- 8 swap-uniba_LLaMAntino-2-7b-hf-ITA_? ? ? Unknown LlamaForCausalLM "<a target=""_blank"" href=""https://huggingface.co/swap-uniba/LLaMAntino-2-7b-hf-ITA"" style=""color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;"">swap-uniba/LLaMAntino-2-7b-hf-ITA</a>" 45.2518617559276 0 True ? ? 51.11111111111111 53.267951636107355 33.70307167235495 34.66666666666667 24.295302013422816 45.514286626950536 47.59019966407009 60.855425171736485 100.0 47.55193616643805 46.04838972288254 0.043130721156949686 24.582547279426233 22.260015178994326 69.30864535653794 40.48297086291322 68.0
15
- 12 sapienzanlp_Minerva-3B-base-v1.0_? ? ? Unknown MistralForCausalLM "<a target=""_blank"" href=""https://huggingface.co/sapienzanlp/Minerva-3B-base-v1.0"" style=""color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;"">sapienzanlp/Minerva-3B-base-v1.0</a>" 41.485187669928465 0 True ? ? 49.875480140137604 52.15633707230505 30.97269624573379 24.333333333333336 23.08724832214765 48.93622623624203 45.71528801169143 47.43110547988597 100.0 43.13118956315911 45.77114427860697 -0.015363788820154219 21.8700732759084 23.020245154283693 42.99174436502196 37.371442699146954 68.60000000000001
16
- 3 swap-uniba_LLaMAntino-2-chat-7b-hf-ITA_? ? ? Unknown LlamaForCausalLM "<a target=""_blank"" href=""https://huggingface.co/swap-uniba/LLaMAntino-2-chat-7b-hf-ITA"" style=""color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;"">swap-uniba/LLaMAntino-2-chat-7b-hf-ITA</a>" 38.997012161113425 0 True ? ? 47.32809806550469 43.776841477788466 29.180887372013654 28.111111111111107 23.48993288590604 41.57668822526659 41.556830771361305 44.984357634264406 100.0 41.716872329343005 43.53102430893341 -0.02574637563194932 8.269309204888462 9.339380225529704 58.43272201840739 39.880897484241935 61.8
17
- 16 sapienzanlp_Minerva-1B-base-v1.0_? ? ? Unknown MistralForCausalLM "<a target=""_blank"" href=""https://huggingface.co/sapienzanlp/Minerva-1B-base-v1.0"" style=""color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;"">sapienzanlp/Minerva-1B-base-v1.0</a>" 38.906733116823304 0 True ? ? 50.76172656624852 53.84641914146224 24.573378839590443 22.666666666666664 26.57718120805369 48.25128927047713 44.581537708222804 50.10425395808837 100.0 46.49541549308013 45.46046920890855 0.022249590030925144 14.27287574762189 16.571464690513597 17.48160254077023 39.747932356260876 60.0
18
- 11 sapienzanlp_Minerva-350M-base-v1.0_? ? ? Unknown MistralForCausalLM "<a target=""_blank"" href=""https://huggingface.co/sapienzanlp/Minerva-350M-base-v1.0"" style=""color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;"">sapienzanlp/Minerva-350M-base-v1.0</a>" 36.95204565967993 0 True ? ? 45.17543859649123 35.72145622912868 24.40273037542662 22.88888888888889 52.75167785234899 41.92832319168979 40.67042217927179 46.277755136438564 100.0 36.23277134884009 43.223117993157416 -0.036868413829916326 10.308018221966565 23.388373345290127 4.903980027793706 43.7486912416563 56.599999999999994
 
1
+ eval_name Precision Type T Weight type Architecture Model Avg NLU Avg CFK Avg BFS Avg ⬆️ Hub License #Params (B) Model sha Hub ❀️ Available on the hub Code Data AMI 2020 Agg AMI 2020 Miso ARC-C Belebele GeNTE Neutralizing HaSpeeDe2 HS HaSpeeDe2 Stereo HateCheck HONEST IronITA Irony IronITA Sarcasm ItaCoLA News Sum SENTIPOLC SQuAD it TruthfulQA XCOPA Hellaswag-it
2
+ 6 swap-uniba_LLaMAntino-3-ANITA-8B-Inst-DPO-ITA_bfloat16 bfloat16 fine-tuned πŸ”Ά Adapter LlamaForCausalLM "<a target=""_blank"" href=""https://huggingface.co/swap-uniba/LLaMAntino-3-ANITA-8B-Inst-DPO-ITA"" style=""color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;"">swap-uniba/LLaMAntino-3-ANITA-8B-Inst-DPO-ITA</a>" 50.15912285053053 66.8637539701687 69.6423469054011 62.22174124203344 Llama 3 Community License Agreement 8.0 0 True πŸ‘ πŸ‘ {'value': 49.61934617107031, 'category': 'NLU'} {'value': 73.58604698054239, 'category': 'NLU'} {'value': 56.484641638225256, 'category': 'CFK'} {'value': 83.55555555555556, 'category': 'NLU'} {'value': 33.8255033557047, 'category': 'BFS'} {'value': 72.24399819126907, 'category': 'BFS'} {'value': 61.627116844508144, 'category': 'BFS'} {'value': 80.51511613552358, 'category': 'BFS'} {'value': 100.0, 'category': 'BFS'} {'value': 67.79529918401192, 'category': 'NLU'} {'value': 46.19514665929917, 'category': 'NLU'} {'value': 0.24261234404280246, 'category': 'NLU'} {'value': 33.783978293075634, 'category': 'NLU'} {'value': 46.49499761664646, 'category': 'NLU'} {'value': 71.27317142821833, 'category': 'CFK'} {'value': 68.09423700746308, 'category': 'CFK'} {'value': 73.4, 'category': 'CFK'} {'value': 65.06671977693686, 'category': 'CFK'}
3
+ 13 mistralai_Mistral-7B-Instruct-v0.2_bfloat16 bfloat16 fine-tuned πŸ”Ά Delta MistralForCausalLM "<a target=""_blank"" href=""https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2"" style=""color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;"">mistralai/Mistral-7B-Instruct-v0.2</a>" 49.62173851779433 58.69183546703023 69.176267976939 59.163280653921184 Apache 2.0 7.0 0 True πŸ™ˆ πŸ™ˆ {'value': 61.95096430524839, 'category': 'NLU'} {'value': 66.42194008585093, 'category': 'NLU'} {'value': 44.36860068259386, 'category': 'CFK'} {'value': 67.22222222222223, 'category': 'NLU'} {'value': 29.12751677852349, 'category': 'BFS'} {'value': 71.07491292799637, 'category': 'BFS'} {'value': 67.27017961567233, 'category': 'BFS'} {'value': 78.40873056250285, 'category': 'BFS'} {'value': 100.0, 'category': 'BFS'} {'value': 59.16469471738617, 'category': 'NLU'} {'value': 55.53851376330874, 'category': 'NLU'} {'value': 0.27708420363666786, 'category': 'NLU'} {'value': 36.377962201593874, 'category': 'NLU'} {'value': 50.02052664310759, 'category': 'NLU'} {'value': 68.04841543730598, 'category': 'CFK'} {'value': 59.24407318497844, 'category': 'CFK'} {'value': 64.2, 'category': 'CFK'} {'value': 57.598088030272855, 'category': 'CFK'}
4
+ 8 mii-community_zefiro-7b-dpo-ITA_bfloat16 bfloat16 fine-tuned πŸ”Ά Adapter MistralForCausalLM "<a target=""_blank"" href=""https://huggingface.co/mii-community/zefiro-7b-dpo-ITA"" style=""color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;"">mii-community/zefiro-7b-dpo-ITA</a>" 47.51869156825104 57.89491206679833 68.2055343559792 57.87304599700952 Apache 2.0 7.0 0 True πŸ™ˆ πŸ‘ {'value': 59.97920997920998, 'category': 'NLU'} {'value': 66.14027143881808, 'category': 'NLU'} {'value': 44.19795221843004, 'category': 'CFK'} {'value': 65.88888888888889, 'category': 'NLU'} {'value': 29.798657718120808, 'category': 'BFS'} {'value': 66.93068606112085, 'category': 'BFS'} {'value': 61.46209894750329, 'category': 'BFS'} {'value': 82.83622905315102, 'category': 'BFS'} {'value': 100.0, 'category': 'BFS'} {'value': 58.523449206965395, 'category': 'NLU'} {'value': 54.918191698733956, 'category': 'NLU'} {'value': 0.22337556862808253, 'category': 'NLU'} {'value': 35.66642647158017, 'category': 'NLU'} {'value': 38.80971929318383, 'category': 'NLU'} {'value': 74.34293876621986, 'category': 'CFK'} {'value': 43.34227321311386, 'category': 'CFK'} {'value': 68.4, 'category': 'CFK'} {'value': 59.191396136227844, 'category': 'CFK'}
5
+ 5 meta-llama_Meta-Llama-3-8B_bfloat16 bfloat16 pretrained 🟒 Original LlamaForCausalLM "<a target=""_blank"" href=""https://huggingface.co/meta-llama/Meta-Llama-3-8B"" style=""color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;"">meta-llama/Meta-Llama-3-8B</a>" 48.29856362856205 57.42199318457142 66.8371135489557 57.51922345402972 Meta Llama 3 Community License 8.0 0 True πŸ™ˆ πŸ™ˆ {'value': 60.02710027100271, 'category': 'NLU'} {'value': 63.14678395603251, 'category': 'NLU'} {'value': 40.529010238907844, 'category': 'CFK'} {'value': 76.0, 'category': 'NLU'} {'value': 29.53020134228188, 'category': 'BFS'} {'value': 65.30297764359561, 'category': 'BFS'} {'value': 59.541073390095356, 'category': 'BFS'} {'value': 79.81131536880565, 'category': 'BFS'} {'value': 100.0, 'category': 'BFS'} {'value': 57.31801541230962, 'category': 'NLU'} {'value': 56.750548188367965, 'category': 'NLU'} {'value': 0.2786244415689118, 'category': 'NLU'} {'value': 32.93607461627173, 'category': 'NLU'} {'value': 39.93136214294286, 'category': 'NLU'} {'value': 76.49082768675667, 'category': 'CFK'} {'value': 42.06877766857276, 'category': 'CFK'} {'value': 71.2, 'category': 'CFK'} {'value': 56.8213503286198, 'category': 'CFK'}
6
+ 15 mii-community_zefiro-7b-base-ITA_bfloat16 bfloat16 fine-tuned πŸ”Ά Delta MistralForCausalLM "<a target=""_blank"" href=""https://huggingface.co/mii-community/zefiro-7b-base-ITA"" style=""color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;"">mii-community/zefiro-7b-base-ITA</a>" 46.70847713397559 57.115645622716485 67.35862672334422 57.06091649334544 Apache 2.0 7.0 0 True πŸ™ˆ πŸ‘ {'value': 60.14362403797995, 'category': 'NLU'} {'value': 64.54082375784897, 'category': 'NLU'} {'value': 40.955631399317404, 'category': 'CFK'} {'value': 58.55555555555556, 'category': 'NLU'} {'value': 28.456375838926174, 'category': 'BFS'} {'value': 66.12858980217781, 'category': 'BFS'} {'value': 59.74063711314884, 'category': 'BFS'} {'value': 82.46753086246828, 'category': 'BFS'} {'value': 100.0, 'category': 'BFS'} {'value': 59.05311714498798, 'category': 'NLU'} {'value': 57.8863223808017, 'category': 'NLU'} {'value': 0.09963712635854956, 'category': 'NLU'} {'value': 34.19887652648641, 'category': 'NLU'} {'value': 39.18986054178559, 'category': 'NLU'} {'value': 75.6692177776856, 'category': 'CFK'} {'value': 46.18926820166605, 'category': 'CFK'} {'value': 66.60000000000001, 'category': 'CFK'} {'value': 56.164110734913365, 'category': 'CFK'}
7
+ 10 mii-community_zefiro-7b-sft-ITA_bfloat16 bfloat16 fine-tuned πŸ”Ά Adapter MistralForCausalLM "<a target=""_blank"" href=""https://huggingface.co/mii-community/zefiro-7b-sft-ITA"" style=""color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;"">mii-community/zefiro-7b-sft-ITA</a>" 46.84018585967878 56.40022559897527 67.23695288016117 56.825788112938405 Apache 2.0 7.0 0 True πŸ™ˆ πŸ‘ {'value': 60.458679319889285, 'category': 'NLU'} {'value': 63.51256529535591, 'category': 'NLU'} {'value': 42.32081911262799, 'category': 'CFK'} {'value': 67.77777777777779, 'category': 'NLU'} {'value': 27.248322147651006, 'category': 'BFS'} {'value': 65.72752014372092, 'category': 'BFS'} {'value': 60.158604473839915, 'category': 'BFS'} {'value': 83.05031763559394, 'category': 'BFS'} {'value': 100.0, 'category': 'BFS'} {'value': 52.69566548195397, 'category': 'NLU'} {'value': 51.630329924754, 'category': 'NLU'} {'value': 0.08940878967203518, 'category': 'NLU'} {'value': 34.80608014621687, 'category': 'NLU'} {'value': 43.75098014181036, 'category': 'NLU'} {'value': 74.55382319645513, 'category': 'CFK'} {'value': 42.52003278796414, 'category': 'CFK'} {'value': 67.0, 'category': 'CFK'} {'value': 55.606452897829115, 'category': 'CFK'}
8
+ 11 mistralai_Mistral-7B-v0.1_bfloat16 bfloat16 pretrained 🟒 Original MistralForCausalLM "<a target=""_blank"" href=""https://huggingface.co/mistralai/Mistral-7B-v0.1"" style=""color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;"">mistralai/Mistral-7B-v0.1</a>" 46.732352034614806 56.099282379017794 65.68372283507196 56.17178574956819 Apache 2.0 7.0 0 True πŸ™ˆ πŸ™ˆ {'value': 60.52050697114497, 'category': 'NLU'} {'value': 63.66158365032981, 'category': 'NLU'} {'value': 41.21160409556314, 'category': 'CFK'} {'value': 65.66666666666666, 'category': 'NLU'} {'value': 29.53020134228188, 'category': 'BFS'} {'value': 60.38816689466484, 'category': 'BFS'} {'value': 57.907599364752336, 'category': 'BFS'} {'value': 80.59264657366079, 'category': 'BFS'} {'value': 100.0, 'category': 'BFS'} {'value': 55.23299236027556, 'category': 'NLU'} {'value': 55.67900219124808, 'category': 'NLU'} {'value': 0.131895692851752, 'category': 'NLU'} {'value': 34.09475870496535, 'category': 'NLU'} {'value': 38.87141003943634, 'category': 'NLU'} {'value': 75.08500650762954, 'category': 'CFK'} {'value': 43.19251190731156, 'category': 'CFK'} {'value': 65.60000000000001, 'category': 'CFK'} {'value': 55.40728938458474, 'category': 'CFK'}
9
+ 9 swap-uniba_LLaMAntino-2-chat-13b-hf-ITA_bfloat16 bfloat16 fine-tuned πŸ”Ά Adapter LlamaForCausalLM "<a target=""_blank"" href=""https://huggingface.co/swap-uniba/LLaMAntino-2-chat-13b-hf-ITA"" style=""color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;"">swap-uniba/LLaMAntino-2-chat-13b-hf-ITA</a>" 44.43040251782813 56.03239812713 66.71944637887557 55.72741567461123 Llama 2 Community License 13.0 0 True πŸ™ˆ πŸ‘ {'value': 61.41230947327803, 'category': 'NLU'} {'value': 64.77739009492042, 'category': 'NLU'} {'value': 39.07849829351536, 'category': 'CFK'} {'value': 60.44444444444444, 'category': 'NLU'} {'value': 25.503355704697988, 'category': 'BFS'} {'value': 67.1548291501024, 'category': 'BFS'} {'value': 59.101414060364085, 'category': 'BFS'} {'value': 81.83763297921335, 'category': 'BFS'} {'value': 100.0, 'category': 'BFS'} {'value': 57.92048929663609, 'category': 'NLU'} {'value': 52.2777996043644, 'category': 'NLU'} {'value': 0.1015435288181161, 'category': 'NLU'} {'value': 23.81691473597593, 'category': 'NLU'} {'value': 34.69232896418751, 'category': 'NLU'} {'value': 73.10003377486571, 'category': 'CFK'} {'value': 44.43667505800782, 'category': 'CFK'} {'value': 70.39999999999999, 'category': 'CFK'} {'value': 53.146783509261105, 'category': 'CFK'}
10
+ 0 meta-llama_Llama-2-13b-hf_bfloat16 bfloat16 pretrained 🟒 Original LlamaForCausalLM "<a target=""_blank"" href=""https://huggingface.co/meta-llama/Llama-2-13b-hf"" style=""color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;"">meta-llama/Llama-2-13b-hf</a>" 41.85065195875397 56.39967689118707 61.493331674248495 53.24788684139651 Llama 2 Community License 13.0 0 True πŸ™ˆ πŸ™ˆ {'value': 53.24565637065637, 'category': 'NLU'} {'value': 59.32319654843206, 'category': 'NLU'} {'value': 39.93174061433447, 'category': 'CFK'} {'value': 49.666666666666664, 'category': 'NLU'} {'value': 24.295302013422816, 'category': 'BFS'} {'value': 54.13600451447075, 'category': 'BFS'} {'value': 54.88702987697715, 'category': 'BFS'} {'value': 74.1483219663718, 'category': 'BFS'} {'value': 100.0, 'category': 'BFS'} {'value': 50.34584608393744, 'category': 'NLU'} {'value': 49.636673785442774, 'category': 'NLU'} {'value': 0.11758183179468357, 'category': 'NLU'} {'value': 35.09699883531247, 'category': 'NLU'} {'value': 37.37259554778931, 'category': 'NLU'} {'value': 75.22840229480128, 'category': 'CFK'} {'value': 42.91722979615231, 'category': 'CFK'} {'value': 69.39999999999999, 'category': 'CFK'} {'value': 54.52101175064728, 'category': 'CFK'}
11
+ 3 swap-uniba_LLaMAntino-2-13b-hf-ITA_bfloat16 bfloat16 fine-tuned πŸ”Ά Adapter LlamaForCausalLM "<a target=""_blank"" href=""https://huggingface.co/swap-uniba/LLaMAntino-2-13b-hf-ITA"" style=""color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;"">swap-uniba/LLaMAntino-2-13b-hf-ITA</a>" 42.55180887560955 56.134928395066495 60.86087434667943 53.18253720578516 Llama 2 Community License 13.0 0 True πŸ™ˆ πŸ‘ {'value': 56.79723502304148, 'category': 'NLU'} {'value': 60.93495016444478, 'category': 'NLU'} {'value': 38.56655290102389, 'category': 'CFK'} {'value': 52.33333333333333, 'category': 'NLU'} {'value': 24.697986577181208, 'category': 'BFS'} {'value': 57.1976786986929, 'category': 'BFS'} {'value': 54.2447910290625, 'category': 'BFS'} {'value': 68.16391542846057, 'category': 'BFS'} {'value': 100.0, 'category': 'BFS'} {'value': 56.51605280366516, 'category': 'NLU'} {'value': 51.571111501558086, 'category': 'NLU'} {'value': 0.16387751408972254, 'category': 'NLU'} {'value': 23.495330157527007, 'category': 'NLU'} {'value': 38.60258050721683, 'category': 'NLU'} {'value': 74.20709928774112, 'category': 'CFK'} {'value': 42.12767769734222, 'category': 'CFK'} {'value': 71.8, 'category': 'CFK'} {'value': 53.97331208922525, 'category': 'CFK'}
12
+ 2 g8a9_tweety-mistral-7b_bfloat16 bfloat16 fine-tuned πŸ”Ά Delta MistralForCausalLM "<a target=""_blank"" href=""https://huggingface.co/g8a9/tweety-mistral-7b"" style=""color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;"">g8a9/tweety-mistral-7b</a>" 38.661388892098834 54.395439989754266 59.92245878392879 50.99309588859396 Apache 2.0 7.0 0 True πŸ‘ πŸ‘ {'value': 56.17170479302832, 'category': 'NLU'} {'value': 56.423255312264054, 'category': 'NLU'} {'value': 37.96928327645051, 'category': 'CFK'} {'value': 49.666666666666664, 'category': 'NLU'} {'value': 27.91946308724832, 'category': 'BFS'} {'value': 53.70259637851317, 'category': 'BFS'} {'value': 53.57434872305199, 'category': 'BFS'} {'value': 64.41588573083048, 'category': 'BFS'} {'value': 100.0, 'category': 'BFS'} {'value': 50.21506876304183, 'category': 'NLU'} {'value': 49.42973129711966, 'category': 'NLU'} {'value': 0.11006633622278786, 'category': 'NLU'} {'value': 18.81035591897043, 'category': 'NLU'} {'value': 28.46426204947685, 'category': 'NLU'} {'value': 64.39794432633592, 'category': 'CFK'} {'value': 37.75548120876122, 'category': 'CFK'} {'value': 73.4, 'category': 'CFK'} {'value': 58.45449113722366, 'category': 'CFK'}
13
+ 14 meta-llama_Llama-2-7b-hf_bfloat16 bfloat16 pretrained 🟒 Original LlamaForCausalLM "<a target=""_blank"" href=""https://huggingface.co/meta-llama/Llama-2-7b-hf"" style=""color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;"">meta-llama/Llama-2-7b-hf</a>" 37.63391313202453 51.72929042818041 58.70465604622213 49.35595320214236 Llama 2 Community License 7.0 0 True πŸ™ˆ πŸ™ˆ {'value': 50.26836062232489, 'category': 'NLU'} {'value': 57.089775606014214, 'category': 'NLU'} {'value': 35.153583617747444, 'category': 'CFK'} {'value': 36.11111111111111, 'category': 'NLU'} {'value': 25.100671140939596, 'category': 'BFS'} {'value': 49.33536331841416, 'category': 'BFS'} {'value': 51.73318260900284, 'category': 'BFS'} {'value': 67.35406316275402, 'category': 'BFS'} {'value': 100.0, 'category': 'BFS'} {'value': 47.63910390674802, 'category': 'NLU'} {'value': 48.347086153434084, 'category': 'NLU'} {'value': 0.036528464070504335, 'category': 'NLU'} {'value': 33.756452251726735, 'category': 'NLU'} {'value': 27.82288694076669, 'category': 'NLU'} {'value': 68.6449557225095, 'category': 'CFK'} {'value': 39.16657442183614, 'category': 'CFK'} {'value': 66.0, 'category': 'CFK'} {'value': 49.681338378809, 'category': 'CFK'}
14
+ 1 swap-uniba_LLaMAntino-2-7b-hf-ITA_bfloat16 bfloat16 fine-tuned πŸ”Ά Adapter LlamaForCausalLM "<a target=""_blank"" href=""https://huggingface.co/swap-uniba/LLaMAntino-2-7b-hf-ITA"" style=""color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;"">swap-uniba/LLaMAntino-2-7b-hf-ITA</a>" 34.9414685603479 52.13761513263328 55.65104269523598 47.57670879607239 Llama 2 Community License 7.0 0 True πŸ™ˆ πŸ‘ {'value': 51.11111111111111, 'category': 'NLU'} {'value': 53.267951636107355, 'category': 'NLU'} {'value': 33.70307167235495, 'category': 'CFK'} {'value': 34.66666666666667, 'category': 'NLU'} {'value': 24.295302013422816, 'category': 'BFS'} {'value': 45.514286626950536, 'category': 'BFS'} {'value': 47.59019966407009, 'category': 'BFS'} {'value': 60.855425171736485, 'category': 'BFS'} {'value': 100.0, 'category': 'BFS'} {'value': 47.55193616643805, 'category': 'NLU'} {'value': 46.04838972288254, 'category': 'NLU'} {'value': 0.043130721156949686, 'category': 'NLU'} {'value': 24.582547279426233, 'category': 'NLU'} {'value': 22.260015178994326, 'category': 'NLU'} {'value': 69.30864535653794, 'category': 'CFK'} {'value': 40.48297086291322, 'category': 'CFK'} {'value': 68.0, 'category': 'CFK'} {'value': 49.193387771360285, 'category': 'CFK'}
15
+ 7 sapienzanlp_Minerva-3B-base-v1.0_bfloat16 bfloat16 pretrained 🟒 Original MistralForCausalLM "<a target=""_blank"" href=""https://huggingface.co/sapienzanlp/Minerva-3B-base-v1.0"" style=""color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;"">sapienzanlp/Minerva-3B-base-v1.0</a>" 32.51780487861425 45.62270743274333 53.03397360999342 43.72482864045033 Apache 2.0 3.0 0 True πŸ™ˆ πŸ‘ {'value': 49.875480140137604, 'category': 'NLU'} {'value': 52.15633707230505, 'category': 'NLU'} {'value': 30.97269624573379, 'category': 'CFK'} {'value': 24.333333333333336, 'category': 'NLU'} {'value': 23.08724832214765, 'category': 'BFS'} {'value': 48.93622623624203, 'category': 'BFS'} {'value': 45.71528801169143, 'category': 'BFS'} {'value': 47.43110547988597, 'category': 'BFS'} {'value': 100.0, 'category': 'BFS'} {'value': 43.13118956315911, 'category': 'NLU'} {'value': 45.77114427860697, 'category': 'NLU'} {'value': -0.015363788820154219, 'category': 'NLU'} {'value': 21.8700732759084, 'category': 'NLU'} {'value': 23.020245154283693, 'category': 'NLU'} {'value': 42.99174436502196, 'category': 'CFK'} {'value': 37.371442699146954, 'category': 'CFK'} {'value': 68.60000000000001, 'category': 'CFK'} {'value': 48.17765385381398, 'category': 'CFK'}
16
+ 12 swap-uniba_LLaMAntino-2-chat-7b-hf-ITA_bfloat16 bfloat16 fine-tuned πŸ”Ά Adapter LlamaForCausalLM "<a target=""_blank"" href=""https://huggingface.co/swap-uniba/LLaMAntino-2-chat-7b-hf-ITA"" style=""color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;"">swap-uniba/LLaMAntino-2-chat-7b-hf-ITA</a>" 27.755861293433362 46.472723322751754 50.321561903359665 41.51671550651493 Llama 2 Community License 7.0 0 True πŸ™ˆ πŸ‘ {'value': 47.32809806550469, 'category': 'NLU'} {'value': 43.776841477788466, 'category': 'NLU'} {'value': 29.180887372013654, 'category': 'CFK'} {'value': 28.111111111111107, 'category': 'NLU'} {'value': 23.48993288590604, 'category': 'BFS'} {'value': 41.57668822526659, 'category': 'BFS'} {'value': 41.556830771361305, 'category': 'BFS'} {'value': 44.984357634264406, 'category': 'BFS'} {'value': 100.0, 'category': 'BFS'} {'value': 41.716872329343005, 'category': 'NLU'} {'value': 43.53102430893341, 'category': 'NLU'} {'value': -0.02574637563194932, 'category': 'NLU'} {'value': 8.269309204888462, 'category': 'NLU'} {'value': 9.339380225529704, 'category': 'NLU'} {'value': 58.43272201840739, 'category': 'CFK'} {'value': 39.880897484241935, 'category': 'CFK'} {'value': 61.8, 'category': 'CFK'} {'value': 43.06910973909579, 'category': 'CFK'}
17
+ 4 sapienzanlp_Minerva-1B-base-v1.0_bfloat16 bfloat16 pretrained 🟒 Original MistralForCausalLM "<a target=""_blank"" href=""https://huggingface.co/sapienzanlp/Minerva-1B-base-v1.0"" style=""color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;"">sapienzanlp/Minerva-1B-base-v1.0</a>" 31.262160888066564 35.95468750733228 53.9028524289684 40.37323360812241 Apache 2.0 1.0 0 True πŸ™ˆ πŸ‘ {'value': 50.76172656624852, 'category': 'NLU'} {'value': 53.84641914146224, 'category': 'NLU'} {'value': 24.573378839590443, 'category': 'CFK'} {'value': 22.666666666666664, 'category': 'NLU'} {'value': 26.57718120805369, 'category': 'BFS'} {'value': 48.25128927047713, 'category': 'BFS'} {'value': 44.581537708222804, 'category': 'BFS'} {'value': 50.10425395808837, 'category': 'BFS'} {'value': 100.0, 'category': 'BFS'} {'value': 46.49541549308013, 'category': 'NLU'} {'value': 45.46046920890855, 'category': 'NLU'} {'value': 0.022249590030925144, 'category': 'NLU'} {'value': 14.27287574762189, 'category': 'NLU'} {'value': 16.571464690513597, 'category': 'NLU'} {'value': 17.48160254077023, 'category': 'CFK'} {'value': 39.747932356260876, 'category': 'CFK'} {'value': 60.0, 'category': 'CFK'} {'value': 37.970523800039835, 'category': 'CFK'}
18
+ 16 sapienzanlp_Minerva-350M-base-v1.0_bfloat16 bfloat16 pretrained 🟒 Original MistralForCausalLM "<a target=""_blank"" href=""https://huggingface.co/sapienzanlp/Minerva-350M-base-v1.0"" style=""color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;"">sapienzanlp/Minerva-350M-base-v1.0</a>" 27.112649526241633 32.348128725709046 56.325635671949826 38.59547130796684 Apache 2.0 0.35 0 True πŸ™ˆ πŸ‘ {'value': 45.17543859649123, 'category': 'NLU'} {'value': 35.72145622912868, 'category': 'NLU'} {'value': 24.40273037542662, 'category': 'CFK'} {'value': 22.88888888888889, 'category': 'NLU'} {'value': 52.75167785234899, 'category': 'BFS'} {'value': 41.92832319168979, 'category': 'BFS'} {'value': 40.67042217927179, 'category': 'BFS'} {'value': 46.277755136438564, 'category': 'BFS'} {'value': 100.0, 'category': 'BFS'} {'value': 36.23277134884009, 'category': 'NLU'} {'value': 43.223117993157416, 'category': 'NLU'} {'value': -0.036868413829916326, 'category': 'NLU'} {'value': 10.308018221966565, 'category': 'NLU'} {'value': 23.388373345290127, 'category': 'NLU'} {'value': 4.903980027793706, 'category': 'CFK'} {'value': 43.7486912416563, 'category': 'CFK'} {'value': 56.599999999999994, 'category': 'CFK'} {'value': 32.085241983668595, 'category': 'CFK'}
src/about.py CHANGED
@@ -7,6 +7,7 @@ class Task:
7
  benchmark: str
8
  metric: str
9
  col_name: str
 
10
  higher_is_better: bool = True
11
  scale_by_100: bool = True
12
 
@@ -15,23 +16,24 @@ class Task:
15
  # ---------------------------------------------------
16
  class Tasks(Enum):
17
  # task_key in the json file, metric_key in the json file, name to display in the leaderboard
18
- task1 = Task("ami_2020_aggressiveness", "f1,none", "AMI 2020 Agg")
19
- task2 = Task("ami_2020_misogyny", "f1,none", "AMI 2020 Miso")
20
- task0 = Task("arc_challenge_ita", "acc_norm,none", "ARC-C")
21
- task4 = Task("belebele_ita", "acc_norm,none", "Belebele")
22
- task3 = Task("gente_rephrasing", "acc,none", "GeNTE Neutralizing")
23
- task12 = Task("haspeede2_hs", "f1,none", "HaSpeeDe2 HS")
24
- task13 = Task("haspeede2_stereo", "f1,none", "HaSpeeDe2 Stereo")
25
- task5 = Task("hatecheck_ita", "f1,none", "HateCheck")
26
- task6 = Task("honest_ita", "acc,none", "HONEST", higher_is_better=False)
27
- task14 = Task("ironita_irony", "f1,none", "IronITA Irony")
28
- task15 = Task("ironita_sarcasm", "f1,none", "IronITA Sarcasm")
29
- task7 = Task("itacola", "mcc,none", "ItaCoLA", scale_by_100=False)
30
- task8 = Task("news_sum", "bertscore,none", "News Sum")
31
- task16 = Task("sentipolc", "f1,none", "SENTIPOLC")
32
- task9 = Task("squad_it", "squad_f1,get-answer", "SQuAD it")
33
- task10 = Task("truthfulqa_mc2_ita", "acc,none", "TruthfulQA")
34
- task11 = Task("xcopa_it", "acc,none", "XCOPA")
 
35
 
36
 
37
  NUM_FEWSHOT = 0 # Change with your few shot
 
7
  benchmark: str
8
  metric: str
9
  col_name: str
10
+ category: str
11
  higher_is_better: bool = True
12
  scale_by_100: bool = True
13
 
 
16
  # ---------------------------------------------------
17
  class Tasks(Enum):
18
  # task_key in the json file, metric_key in the json file, name to display in the leaderboard
19
+ task1 = Task("ami_2020_aggressiveness", "f1,none", "AMI 2020 Agg", "NLU")
20
+ task2 = Task("ami_2020_misogyny", "f1,none", "AMI 2020 Miso", "NLU")
21
+ task0 = Task("arc_challenge_ita", "acc_norm,none", "ARC-C", "CFK")
22
+ task4 = Task("belebele_ita", "acc_norm,none", "Belebele", "NLU")
23
+ task3 = Task("gente_rephrasing", "acc,none", "GeNTE Neutralizing", "BFS")
24
+ task12 = Task("haspeede2_hs", "f1,none", "HaSpeeDe2 HS", "BFS")
25
+ task13 = Task("haspeede2_stereo", "f1,none", "HaSpeeDe2 Stereo", "BFS")
26
+ task5 = Task("hatecheck_ita", "f1,none", "HateCheck", "BFS")
27
+ task6 = Task("honest_ita", "acc,none", "HONEST", "BFS", higher_is_better=False)
28
+ task14 = Task("ironita_irony", "f1,none", "IronITA Irony", "NLU")
29
+ task15 = Task("ironita_sarcasm", "f1,none", "IronITA Sarcasm", "NLU")
30
+ task7 = Task("itacola", "mcc,none", "ItaCoLA", "NLU", scale_by_100=False)
31
+ task8 = Task("news_sum", "bertscore,none", "News Sum", "NLU")
32
+ task16 = Task("sentipolc", "f1,none", "SENTIPOLC", "NLU")
33
+ task9 = Task("squad_it", "squad_f1,get-answer", "SQuAD it", "CFK")
34
+ task10 = Task("truthfulqa_mc2_ita", "acc,none", "TruthfulQA", "CFK")
35
+ task11 = Task("xcopa_it", "acc,none", "XCOPA", "CFK")
36
+ task17 = Task("hellaswag_ita", "acc_norm,none", "Hellaswag-it", "CFK")
37
 
38
 
39
  NUM_FEWSHOT = 0 # Change with your few shot
src/display/utils.py CHANGED
@@ -32,7 +32,10 @@ auto_eval_column_dict.append(["training_codebase", ColumnContent, ColumnContent(
32
  auto_eval_column_dict.append(["training_data", ColumnContent, ColumnContent("Data", "str", True, False)])
33
 
34
  # Scores
35
- auto_eval_column_dict.append(["average", ColumnContent, ColumnContent("Average ⬆️", "number", True)])
 
 
 
36
  for task in Tasks:
37
  auto_eval_column_dict.append([task.name, ColumnContent, ColumnContent(task.value.col_name, "number", True)])
38
 
 
32
  auto_eval_column_dict.append(["training_data", ColumnContent, ColumnContent("Data", "str", True, False)])
33
 
34
  # Scores
35
+ auto_eval_column_dict.append(["average", ColumnContent, ColumnContent("Avg ⬆️", "number", True)])
36
+ auto_eval_column_dict.append(["average_NLU", ColumnContent, ColumnContent("Avg NLU", "number", True)])
37
+ auto_eval_column_dict.append(["average_CFK", ColumnContent, ColumnContent("Avg CFK", "number", True)])
38
+ auto_eval_column_dict.append(["average_BFS", ColumnContent, ColumnContent("Avg BFS", "number", True)])
39
  for task in Tasks:
40
  auto_eval_column_dict.append([task.name, ColumnContent, ColumnContent(task.value.col_name, "number", True)])
41
 
src/leaderboard/read_evals.py CHANGED
@@ -104,7 +104,7 @@ class EvalResult:
104
  if task.scale_by_100:
105
  mean_acc *= 100.0
106
 
107
- results[task.benchmark] = mean_acc
108
 
109
  # pdb.set_trace()
110
 
@@ -141,7 +141,17 @@ class EvalResult:
141
 
142
  def to_dict(self):
143
  """Converts the Eval Result to a dict compatible with our dataframe display"""
144
- average = sum([v for v in self.results.values() if v is not None]) / len(Tasks)
 
 
 
 
 
 
 
 
 
 
145
  data_dict = {
146
  "eval_name": self.eval_name, # not a column, just a save name,
147
  AutoEvalColumn.precision.name: self.precision.value.name,
@@ -150,6 +160,9 @@ class EvalResult:
150
  AutoEvalColumn.weight_type.name: self.weight_type.value.name,
151
  AutoEvalColumn.architecture.name: self.architecture,
152
  AutoEvalColumn.model.name: make_clickable_model(self.full_model),
 
 
 
153
  AutoEvalColumn.average.name: average,
154
  AutoEvalColumn.license.name: self.license,
155
  AutoEvalColumn.params.name: self.num_params,
 
104
  if task.scale_by_100:
105
  mean_acc *= 100.0
106
 
107
+ results[task.benchmark] = {"value": mean_acc, "category": task.category}
108
 
109
  # pdb.set_trace()
110
 
 
141
 
142
  def to_dict(self):
143
  """Converts the Eval Result to a dict compatible with our dataframe display"""
144
+
145
+ # compute one average score per category
146
+ def _get_score_category(category):
147
+ filtered_scores = [v["value"] for _, v in self.results.items() if v["category"] == category]
148
+ return sum(filtered_scores) / len(filtered_scores)
149
+
150
+ average_NLU = _get_score_category("NLU")
151
+ average_CFK = _get_score_category("CFK")
152
+ average_BFS = _get_score_category("BFS")
153
+ average = (average_NLU + average_CFK + average_BFS) / 3
154
+
155
  data_dict = {
156
  "eval_name": self.eval_name, # not a column, just a save name,
157
  AutoEvalColumn.precision.name: self.precision.value.name,
 
160
  AutoEvalColumn.weight_type.name: self.weight_type.value.name,
161
  AutoEvalColumn.architecture.name: self.architecture,
162
  AutoEvalColumn.model.name: make_clickable_model(self.full_model),
163
+ AutoEvalColumn.average_NLU.name: average_NLU,
164
+ AutoEvalColumn.average_CFK.name: average_CFK,
165
+ AutoEvalColumn.average_BFS.name: average_BFS,
166
  AutoEvalColumn.average.name: average,
167
  AutoEvalColumn.license.name: self.license,
168
  AutoEvalColumn.params.name: self.num_params,