Adding Evaluation Results

#3
by leaderboard-pr-bot - opened
Files changed (1) hide show
  1. README.md +50 -25
README.md CHANGED
@@ -1,7 +1,19 @@
1
  ---
 
 
2
  license: llama3.2
 
3
  base_model:
4
  - meta-llama/Llama-3.2-1B-Instruct
 
 
 
 
 
 
 
 
 
5
  model-index:
6
  - name: Llama-3.2-SUN-2.4B-v1.0.0
7
  results:
@@ -17,9 +29,11 @@ model-index:
17
  - type: inst_level_strict_acc and prompt_level_strict_acc
18
  value: 56.37
19
  name: strict accuracy
 
 
 
20
  source:
21
- url: >-
22
- https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=meditsolutions/Llama-3.2-SUN-2.4B-v1.0.0
23
  name: Open LLM Leaderboard
24
  - task:
25
  type: text-generation
@@ -33,9 +47,11 @@ model-index:
33
  - type: acc_norm
34
  value: 7.21
35
  name: normalized accuracy
 
 
 
36
  source:
37
- url: >-
38
- https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=meditsolutions/Llama-3.2-SUN-2.4B-v1.0.0
39
  name: Open LLM Leaderboard
40
  - task:
41
  type: text-generation
@@ -49,9 +65,11 @@ model-index:
49
  - type: exact_match
50
  value: 4.83
51
  name: exact match
 
 
 
52
  source:
53
- url: >-
54
- https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=meditsolutions/Llama-3.2-SUN-2.4B-v1.0.0
55
  name: Open LLM Leaderboard
56
  - task:
57
  type: text-generation
@@ -65,9 +83,11 @@ model-index:
65
  - type: acc_norm
66
  value: 1.01
67
  name: acc_norm
 
 
 
68
  source:
69
- url: >-
70
- https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=meditsolutions/Llama-3.2-SUN-2.4B-v1.0.0
71
  name: Open LLM Leaderboard
72
  - task:
73
  type: text-generation
@@ -81,9 +101,11 @@ model-index:
81
  - type: acc_norm
82
  value: 3.02
83
  name: acc_norm
 
 
 
84
  source:
85
- url: >-
86
- https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=meditsolutions/Llama-3.2-SUN-2.4B-v1.0.0
87
  name: Open LLM Leaderboard
88
  - task:
89
  type: text-generation
@@ -99,22 +121,12 @@ model-index:
99
  - type: acc
100
  value: 6.03
101
  name: accuracy
 
 
 
102
  source:
103
- url: >-
104
- https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=meditsolutions/Llama-3.2-SUN-2.4B-v1.0.0
105
  name: Open LLM Leaderboard
106
- datasets:
107
- - argilla/OpenHermesPreferences
108
- - argilla/magpie-ultra-v0.1
109
- - argilla/Capybara-Preferences-Filtered
110
- - mlabonne/open-perfectblend
111
- - HuggingFaceTB/everyday-conversations-llama3.1-2k
112
- - WizardLMTeam/WizardLM_evol_instruct_V2_196k
113
- - ProlificAI/social-reasoning-rlhf
114
- language:
115
- - en
116
- pipeline_tag: text-generation
117
- library_name: transformers
118
  ---
119
 
120
  # MedIT SUN 2.5B
@@ -165,4 +177,17 @@ Detailed results can be found [here](https://huggingface.co/datasets/open-llm-le
165
  |MATH Lvl 5 (4-Shot)| 4.83|
166
  |GPQA (0-shot) | 1.01|
167
  |MuSR (0-shot) | 3.02|
168
- |MMLU-PRO (5-shot) | 6.03|
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
+ language:
3
+ - en
4
  license: llama3.2
5
+ library_name: transformers
6
  base_model:
7
  - meta-llama/Llama-3.2-1B-Instruct
8
+ datasets:
9
+ - argilla/OpenHermesPreferences
10
+ - argilla/magpie-ultra-v0.1
11
+ - argilla/Capybara-Preferences-Filtered
12
+ - mlabonne/open-perfectblend
13
+ - HuggingFaceTB/everyday-conversations-llama3.1-2k
14
+ - WizardLMTeam/WizardLM_evol_instruct_V2_196k
15
+ - ProlificAI/social-reasoning-rlhf
16
+ pipeline_tag: text-generation
17
  model-index:
18
  - name: Llama-3.2-SUN-2.4B-v1.0.0
19
  results:
 
29
  - type: inst_level_strict_acc and prompt_level_strict_acc
30
  value: 56.37
31
  name: strict accuracy
32
+ - type: inst_level_strict_acc and prompt_level_strict_acc
33
+ value: 55.37
34
+ name: strict accuracy
35
  source:
36
+ url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=meditsolutions/Llama-3.2-SUN-2.4B-v1.0.0
 
37
  name: Open LLM Leaderboard
38
  - task:
39
  type: text-generation
 
47
  - type: acc_norm
48
  value: 7.21
49
  name: normalized accuracy
50
+ - type: acc_norm
51
+ value: 7.17
52
+ name: normalized accuracy
53
  source:
54
+ url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=meditsolutions/Llama-3.2-SUN-2.4B-v1.0.0
 
55
  name: Open LLM Leaderboard
56
  - task:
57
  type: text-generation
 
65
  - type: exact_match
66
  value: 4.83
67
  name: exact match
68
+ - type: exact_match
69
+ value: 1.28
70
+ name: exact match
71
  source:
72
+ url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=meditsolutions/Llama-3.2-SUN-2.4B-v1.0.0
 
73
  name: Open LLM Leaderboard
74
  - task:
75
  type: text-generation
 
83
  - type: acc_norm
84
  value: 1.01
85
  name: acc_norm
86
+ - type: acc_norm
87
+ value: 0.45
88
+ name: acc_norm
89
  source:
90
+ url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=meditsolutions/Llama-3.2-SUN-2.4B-v1.0.0
 
91
  name: Open LLM Leaderboard
92
  - task:
93
  type: text-generation
 
101
  - type: acc_norm
102
  value: 3.02
103
  name: acc_norm
104
+ - type: acc_norm
105
+ value: 0.13
106
+ name: acc_norm
107
  source:
108
+ url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=meditsolutions/Llama-3.2-SUN-2.4B-v1.0.0
 
109
  name: Open LLM Leaderboard
110
  - task:
111
  type: text-generation
 
121
  - type: acc
122
  value: 6.03
123
  name: accuracy
124
+ - type: acc
125
+ value: 7.17
126
+ name: accuracy
127
  source:
128
+ url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=meditsolutions/Llama-3.2-SUN-2.4B-v1.0.0
 
129
  name: Open LLM Leaderboard
 
 
 
 
 
 
 
 
 
 
 
 
130
  ---
131
 
132
  # MedIT SUN 2.5B
 
177
  |MATH Lvl 5 (4-Shot)| 4.83|
178
  |GPQA (0-shot) | 1.01|
179
  |MuSR (0-shot) | 3.02|
180
+ |MMLU-PRO (5-shot) | 6.03|
181
+ # [Open LLM Leaderboard Evaluation Results](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard)
182
+ Detailed results can be found [here](https://huggingface.co/datasets/open-llm-leaderboard/details_meditsolutions__Llama-3.2-SUN-2.5B-chat)
183
+
184
+ | Metric |Value|
185
+ |-------------------|----:|
186
+ |Avg. |11.93|
187
+ |IFEval (0-Shot) |55.37|
188
+ |BBH (3-Shot) | 7.17|
189
+ |MATH Lvl 5 (4-Shot)| 1.28|
190
+ |GPQA (0-shot) | 0.45|
191
+ |MuSR (0-shot) | 0.13|
192
+ |MMLU-PRO (5-shot) | 7.17|
193
+