g8a9 commited on
Commit
e150b93
β€’
1 Parent(s): e3d6a90

save a TSV

Browse files
Files changed (2) hide show
  1. latest_results.tsv +18 -0
  2. src/populate.py +4 -0
latest_results.tsv ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ eval_name Precision Type T Weight type Architecture Model Average ⬆️ Hub License #Params (B) Model sha Hub ❀️ Available on the hub Code Data AMI 2020 Agg AMI 2020 Miso ARC-C Belebele GeNTE Neutralizing HaSpeeDe2 HS HaSpeeDe2 Stereo HateCheck HONEST IronITA Irony IronITA Sarcasm ItaCoLA News Sum SENTIPOLC SQuAD it TruthfulQA XCOPA
2
+ 6 meta-llama_Meta-Llama-3-8B-Instruct_bfloat16 bfloat16 fine-tuned πŸ”Ά Delta LlamaForCausalLM "<a target=""_blank"" href=""https://huggingface.co/meta-llama/Meta-Llama-3-8B-Instruct"" style=""color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;"">meta-llama/Meta-Llama-3-8B-Instruct</a>" 58.748316329193344 Meta Llama 3 Community License 8.0 0 True πŸ™ˆ πŸ™ˆ 55.37407439022941 71.59839304531086 42.57679180887372 82.0 32.48322147651007 70.53457622533335 63.09031737569537 81.04353954390334 100.0 68.90825671526659 50.63388859343638 0.2575796842123843 35.87793977181792 44.40535171743039 76.4493013414765 51.688145906790595 71.8
3
+ 5 mistralai_Mistral-7B-Instruct-v0.2_bfloat16 bfloat16 fine-tuned πŸ”Ά Delta MistralForCausalLM "<a target=""_blank"" href=""https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2"" style=""color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;"">mistralai/Mistral-7B-Instruct-v0.2</a>" 57.34121935588332 Apache 2.0 7.0 0 True πŸ™ˆ πŸ™ˆ 59.26344649551212 67.03848859411114 44.36860068259386 67.55555555555556 29.12751677852349 70.94842426874283 66.92711073442074 77.91591984780963 100.0 60.340552982611825 52.5864148320762 0.2650337064892725 36.39365330456299 50.86004322897759 67.76589485305061 59.24407318497844 64.2
4
+ 7 meta-llama_Meta-Llama-3-8B_bfloat16 bfloat16 pretrained 🟒 Original LlamaForCausalLM "<a target=""_blank"" href=""https://huggingface.co/meta-llama/Meta-Llama-3-8B"" style=""color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;"">meta-llama/Meta-Llama-3-8B</a>" 56.06703915537942 Meta Llama 3 Community License 8.0 0 True πŸ™ˆ πŸ™ˆ 59.167006941608825 65.2988113338495 40.44368600682594 75.88888888888889 29.664429530201343 66.34318803515383 59.665954331496216 80.46901075930542 100.0 55.417040602648825 56.72119925007975 0.27369249994767686 32.8415569535643 41.65027333775969 76.0261495015472 42.068777668572736 71.2
5
+ 11 mii-community_zefiro-7b-dpo-ITA_bfloat16 bfloat16 fine-tuned πŸ”Ά Adapter MistralForCausalLM "<a target=""_blank"" href=""https://huggingface.co/mii-community/zefiro-7b-dpo-ITA"" style=""color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;"">mii-community/zefiro-7b-dpo-ITA</a>" 55.965402990833326 Apache 2.0 7.0 0 True πŸ™ˆ πŸ‘ 58.82330921555731 65.29219074291716 44.19795221843004 66.11111111111111 29.395973154362416 66.42034413085725 62.04374417717792 82.92405607588724 100.0 59.58686440677966 54.61088096497907 0.15622781170005148 35.73603929970904 40.115316478607326 74.25556784297711 43.342273213113806 68.4
6
+ 12 mii-community_zefiro-7b-sft-ITA_bfloat16 bfloat16 fine-tuned πŸ”Ά Adapter MistralForCausalLM "<a target=""_blank"" href=""https://huggingface.co/mii-community/zefiro-7b-sft-ITA"" style=""color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;"">mii-community/zefiro-7b-sft-ITA</a>" 55.49594502634554 Apache 2.0 7.0 0 True πŸ™ˆ πŸ‘ 59.05979939301703 65.1057440915327 42.491467576791806 68.11111111111111 26.845637583892618 66.26712374430319 62.8192509112937 82.66496052951742 100.0 52.30611640858258 51.83751520046043 0.1357069141230042 34.79253286178762 46.95941666858784 74.51464966490876 42.52003278796419 67.0
7
+ 10 mii-community_zefiro-7b-base-ITA_bfloat16 bfloat16 fine-tuned πŸ”Ά Delta MistralForCausalLM "<a target=""_blank"" href=""https://huggingface.co/mii-community/zefiro-7b-base-ITA"" style=""color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;"">mii-community/zefiro-7b-base-ITA</a>" 55.00699465875708 Apache 2.0 7.0 0 True πŸ™ˆ πŸ‘ 58.26528760660498 64.28826512391971 41.04095563139932 58.77777777777777 27.651006711409398 63.41040091554036 60.20187319698322 83.36773972540995 100.0 59.61581980369971 57.22956187895212 0.21630746589700614 34.14146108746794 38.60348969137316 75.51969438076942 46.18926820166605 66.60000000000001
8
+ 4 mistralai_Mistral-7B-v0.1_bfloat16 bfloat16 pretrained 🟒 Original MistralForCausalLM "<a target=""_blank"" href=""https://huggingface.co/mistralai/Mistral-7B-v0.1"" style=""color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;"">mistralai/Mistral-7B-v0.1</a>" 54.53886941414389 Apache 2.0 7.0 0 True πŸ™ˆ πŸ™ˆ 57.328824884373255 65.894796072133 41.12627986348123 65.55555555555556 29.395973154362416 60.74292449685459 58.40138983607699 81.20893551611952 100.0 55.21599398531273 56.0842910054169 0.21650562273812077 33.95578203972551 38.248077168561004 74.9929389324236 43.192511907311555 65.60000000000001
9
+ 13 swap-uniba_LLaMAntino-2-chat-13b-hf-ITA_bfloat16 bfloat16 fine-tuned πŸ”Ά Adapter LlamaForCausalLM "<a target=""_blank"" href=""https://huggingface.co/swap-uniba/LLaMAntino-2-chat-13b-hf-ITA"" style=""color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;"">swap-uniba/LLaMAntino-2-chat-13b-hf-ITA</a>" 54.11970329791716 Llama 2 Community License 13.0 0 True πŸ™ˆ πŸ‘ 61.10842468417231 65.37114603439397 39.16382252559727 60.22222222222222 25.369127516778523 69.19701593869706 58.47240303675274 81.91924285348375 100.0 60.50989600805099 52.82407691311843 0.14705407414144434 23.961182038838874 33.936431374370564 72.99623578596571 44.43667505800782 70.39999999999999
10
+ 9 meta-llama_Llama-2-13b-hf_bfloat16 bfloat16 pretrained 🟒 Original LlamaForCausalLM "<a target=""_blank"" href=""https://huggingface.co/meta-llama/Llama-2-13b-hf"" style=""color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;"">meta-llama/Llama-2-13b-hf</a>" 51.53259991565551 Llama 2 Community License 13.0 0 True πŸ™ˆ πŸ™ˆ 55.5211353099392 59.73745072519405 39.67576791808874 49.77777777777778 24.295302013422816 56.705263521819575 55.58451703385505 75.35374357525852 100.0 49.6392951529161 51.32659342493766 0.15611794645515564 34.99992804182015 35.591589638147205 75.37248669035945 42.917229796152284 69.39999999999999
11
+ 15 swap-uniba_LLaMAntino-2-13b-hf-ITA_bfloat16 bfloat16 fine-tuned πŸ”Ά Adapter LlamaForCausalLM "<a target=""_blank"" href=""https://huggingface.co/swap-uniba/LLaMAntino-2-13b-hf-ITA"" style=""color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;"">swap-uniba/LLaMAntino-2-13b-hf-ITA</a>" 51.398892791161465 Llama 2 Community License 13.0 0 True πŸ™ˆ πŸ‘ 56.91493042765838 60.79803569083185 38.395904436860064 52.22222222222223 24.563758389261743 59.591680814940574 53.72166074176572 68.63908831908832 100.0 53.8835564536499 55.220925077582386 0.24355772539252643 23.46778181911886 37.868993755237724 74.32140387879224 42.12767769734223 71.8
12
+ 3 g8a9_tweety-mistral-7b_bfloat16 bfloat16 fine-tuned πŸ”Ά Delta MistralForCausalLM "<a target=""_blank"" href=""https://huggingface.co/g8a9/tweety-mistral-7b"" style=""color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;"">g8a9/tweety-mistral-7b</a>" 48.30841019247476 Apache 2.0 7.0 0 True πŸ‘ πŸ‘ 51.45449792748049 56.83712780075503 38.310580204778155 49.77777777777778 26.308724832214764 56.756734367216744 54.259763500716296 64.359704127708 100.0 48.96104026840812 49.87333014539054 0.12625704978630167 18.72596344839197 30.051768572855263 64.28422203983018 37.75548120876116 73.4
13
+ 8 meta-llama_Llama-2-7b-hf_bfloat16 bfloat16 pretrained 🟒 Original LlamaForCausalLM "<a target=""_blank"" href=""https://huggingface.co/meta-llama/Llama-2-7b-hf"" style=""color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;"">meta-llama/Llama-2-7b-hf</a>" 47.88766168947672 Llama 2 Community License 7.0 0 True πŸ™ˆ πŸ™ˆ 50.167656275074535 58.36785332162082 34.8976109215017 36.0 24.832214765100673 51.08771929824562 54.388067109409945 68.27095354111434 100.0 47.98695094164673 52.28499188648629 0.1232306318769991 33.83386905556545 28.13533353128773 68.54722302033736 39.16657442183617 66.0
14
+ 14 swap-uniba_LLaMAntino-2-7b-hf-ITA_bfloat16 bfloat16 fine-tuned πŸ”Ά Adapter LlamaForCausalLM "<a target=""_blank"" href=""https://huggingface.co/swap-uniba/LLaMAntino-2-7b-hf-ITA"" style=""color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;"">swap-uniba/LLaMAntino-2-7b-hf-ITA</a>" 45.77323088190624 Llama 2 Community License 7.0 0 True πŸ™ˆ πŸ‘ 50.55555555555556 53.96398030216369 33.532423208191126 35.0 24.295302013422816 45.45927084511112 48.916213374427244 63.034868799504395 100.0 49.374306621370714 47.508286764686886 0.12030506441959186 24.681836570629475 24.100219820665426 69.11938518934485 40.482970862913184 68.0
15
+ 0 sapienzanlp_Minerva-3B-base-v1.0_bfloat16 bfloat16 pretrained 🟒 Original MistralForCausalLM "<a target=""_blank"" href=""https://huggingface.co/sapienzanlp/Minerva-3B-base-v1.0"" style=""color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;"">sapienzanlp/Minerva-3B-base-v1.0</a>" 41.83701792171881 Apache 2.0 3.0 0 True πŸ™ˆ πŸ‘ 49.23359098650596 52.79773093447906 30.97269624573379 24.333333333333336 23.221476510067113 48.934170047390545 45.62087699981554 48.50152328821496 100.0 45.47176216254846 46.937293275884066 -0.033345460872866474 22.064438703049753 23.965207913141235 43.23710703078177 37.371442699147025 68.60000000000001
16
+ 16 swap-uniba_LLaMAntino-2-chat-7b-hf-ITA_bfloat16 bfloat16 fine-tuned πŸ”Ά Adapter LlamaForCausalLM "<a target=""_blank"" href=""https://huggingface.co/swap-uniba/LLaMAntino-2-chat-7b-hf-ITA"" style=""color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;"">swap-uniba/LLaMAntino-2-chat-7b-hf-ITA</a>" 39.41202334600491 Llama 2 Community License 7.0 0 True πŸ™ˆ πŸ‘ 46.2034115499462 45.34461746324341 29.266211604095567 28.111111111111107 23.758389261744966 42.88181951386289 42.392736217028414 46.58756852047553 100.0 41.699322128331325 45.99082660952828 0.006128977707520721 8.108838055814852 9.097213196911147 58.875305188040464 39.880897484241906 61.8
17
+ 2 sapienzanlp_Minerva-1B-base-v1.0_bfloat16 bfloat16 pretrained 🟒 Original MistralForCausalLM "<a target=""_blank"" href=""https://huggingface.co/sapienzanlp/Minerva-1B-base-v1.0"" style=""color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;"">sapienzanlp/Minerva-1B-base-v1.0</a>" 38.91315019063004 Apache 2.0 1.0 0 True πŸ™ˆ πŸ‘ 48.12085869829324 54.850361197110416 24.573378839590443 22.666666666666664 26.44295302013423 49.56106111987823 46.22580429357212 49.08730795600027 100.0 45.20836949340911 47.013888888888886 0.040313621284920456 14.386315956732856 16.24451875278343 17.353822380105154 39.74793235626088 60.0
18
+ 1 sapienzanlp_Minerva-350M-base-v1.0_bfloat16 bfloat16 pretrained 🟒 Original MistralForCausalLM "<a target=""_blank"" href=""https://huggingface.co/sapienzanlp/Minerva-350M-base-v1.0"" style=""color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;"">sapienzanlp/Minerva-350M-base-v1.0</a>" 37.29084548916654 Apache 2.0 0.35 0 True πŸ™ˆ πŸ‘ 45.17543859649123 37.91598801552352 24.40273037542662 22.88888888888889 53.8255033557047 42.03399318323408 40.00324919625145 46.79714365710485 100.0 38.049817139468225 44.255424938736375 -0.01382899490742639 10.341357559414417 22.94165519039672 4.978320972441255 43.74869124165633 56.599999999999994
src/populate.py CHANGED
@@ -15,6 +15,10 @@ def get_leaderboard_df(results_path: str, requests_path: str, cols: list, benchm
15
 
16
  df = pd.DataFrame.from_records(all_data_json)
17
  df = df.sort_values(by=[AutoEvalColumn.average.name], ascending=False)
 
 
 
 
18
  df = df[cols].round(decimals=2)
19
 
20
  # filter out if any of the benchmarks have not been produced
 
15
 
16
  df = pd.DataFrame.from_records(all_data_json)
17
  df = df.sort_values(by=[AutoEvalColumn.average.name], ascending=False)
18
+
19
+ if df.shape[0]:
20
+ df.to_csv("latest_results.tsv", sep="\t")
21
+
22
  df = df[cols].round(decimals=2)
23
 
24
  # filter out if any of the benchmarks have not been produced