Menouar leaderboard-pr-bot commited on
Commit
51fddae
1 Parent(s): fa9cda4

Adding Evaluation Results (#1)

Browse files

- Adding Evaluation Results (2f8cd6a4cf539ca037f50e16748799c0f046a3b1)


Co-authored-by: Open LLM Leaderboard PR Bot <[email protected]>

Files changed (1) hide show
  1. README.md +64 -47
README.md CHANGED
@@ -1,4 +1,6 @@
1
  ---
 
 
2
  license: mit
3
  library_name: peft
4
  tags:
@@ -6,12 +8,14 @@ tags:
6
  - sft
7
  - generated_from_trainer
8
  - pytorch
 
 
9
  base_model: microsoft/phi-2
 
10
  model-index:
11
  - name: phi-2-basic-maths
12
  results:
13
- # AI2 Reasoning Challenge (25-Shot)
14
- - task:
15
  type: text-generation
16
  name: Text Generation
17
  dataset:
@@ -22,15 +26,13 @@ model-index:
22
  args:
23
  num_few_shot: 25
24
  metrics:
25
- - type: acc_norm
26
- name: normalized accuracy
27
- value: 55.80
28
  source:
29
- name: Open LLM Leaderboard
30
  url: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=Menouar/phi-2-basic-maths
31
-
32
- # HellaSwag (10-shot)
33
- - task:
34
  type: text-generation
35
  name: Text Generation
36
  dataset:
@@ -40,15 +42,13 @@ model-index:
40
  args:
41
  num_few_shot: 10
42
  metrics:
43
- - type: acc_norm
44
- name: normalized accuracy
45
- value: 71.15
46
  source:
47
- name: Open LLM Leaderboard
48
  url: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=Menouar/phi-2-basic-maths
49
-
50
- # MMLU (5-Shot)
51
- - task:
52
  type: text-generation
53
  name: Text Generation
54
  dataset:
@@ -59,15 +59,13 @@ model-index:
59
  args:
60
  num_few_shot: 5
61
  metrics:
62
- - type: acc
63
- name: accuracy
64
- value: 47.27
65
  source:
66
- name: Open LLM Leaderboard
67
  url: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=Menouar/phi-2-basic-maths
68
-
69
- # Winogrande (5-shot)
70
- - task:
71
  type: text-generation
72
  name: Text Generation
73
  dataset:
@@ -78,15 +76,13 @@ model-index:
78
  args:
79
  num_few_shot: 5
80
  metrics:
81
- - type: acc
82
- name: accuracy
83
- value: 75.3
84
  source:
85
- name: Open LLM Leaderboard
86
  url: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=Menouar/phi-2-basic-maths
87
-
88
- # truthfulqa (0-shot)
89
- - task:
90
  type: text-generation
91
  name: Text Generation
92
  dataset:
@@ -97,15 +93,13 @@ model-index:
97
  args:
98
  num_few_shot: 0
99
  metrics:
100
- - type: mc2
101
- name: mc2
102
- value: 41.40
103
  source:
104
- name: Open LLM Leaderboard
105
  url: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=Menouar/phi-2-basic-maths
106
-
107
- # GSM8k (5-shot)
108
- - task:
109
  type: text-generation
110
  name: Text Generation
111
  dataset:
@@ -116,18 +110,28 @@ model-index:
116
  args:
117
  num_few_shot: 5
118
  metrics:
119
- - type: acc
120
- name: accuracy
121
- value: 30.70
122
  source:
 
123
  name: Open LLM Leaderboard
 
 
 
 
 
 
 
 
 
 
 
 
 
 
124
  url: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=Menouar/phi-2-basic-maths
125
-
126
- datasets:
127
- - gsm8k
128
- language:
129
- - en
130
- pipeline_tag: text-generation
131
  ---
132
 
133
  # phi-2-basic-maths
@@ -196,4 +200,17 @@ Unclear answers: 7.81%
196
  - Transformers 4.38.0.dev0
197
  - Pytorch 2.1.0+cu121
198
  - Datasets 2.16.1
199
- - Tokenizers 0.15.1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
+ language:
3
+ - en
4
  license: mit
5
  library_name: peft
6
  tags:
 
8
  - sft
9
  - generated_from_trainer
10
  - pytorch
11
+ datasets:
12
+ - gsm8k
13
  base_model: microsoft/phi-2
14
+ pipeline_tag: text-generation
15
  model-index:
16
  - name: phi-2-basic-maths
17
  results:
18
+ - task:
 
19
  type: text-generation
20
  name: Text Generation
21
  dataset:
 
26
  args:
27
  num_few_shot: 25
28
  metrics:
29
+ - type: acc_norm
30
+ value: 55.8
31
+ name: normalized accuracy
32
  source:
 
33
  url: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=Menouar/phi-2-basic-maths
34
+ name: Open LLM Leaderboard
35
+ - task:
 
36
  type: text-generation
37
  name: Text Generation
38
  dataset:
 
42
  args:
43
  num_few_shot: 10
44
  metrics:
45
+ - type: acc_norm
46
+ value: 71.15
47
+ name: normalized accuracy
48
  source:
 
49
  url: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=Menouar/phi-2-basic-maths
50
+ name: Open LLM Leaderboard
51
+ - task:
 
52
  type: text-generation
53
  name: Text Generation
54
  dataset:
 
59
  args:
60
  num_few_shot: 5
61
  metrics:
62
+ - type: acc
63
+ value: 47.27
64
+ name: accuracy
65
  source:
 
66
  url: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=Menouar/phi-2-basic-maths
67
+ name: Open LLM Leaderboard
68
+ - task:
 
69
  type: text-generation
70
  name: Text Generation
71
  dataset:
 
76
  args:
77
  num_few_shot: 5
78
  metrics:
79
+ - type: acc
80
+ value: 75.3
81
+ name: accuracy
82
  source:
 
83
  url: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=Menouar/phi-2-basic-maths
84
+ name: Open LLM Leaderboard
85
+ - task:
 
86
  type: text-generation
87
  name: Text Generation
88
  dataset:
 
93
  args:
94
  num_few_shot: 0
95
  metrics:
96
+ - type: mc2
97
+ value: 41.4
98
+ name: mc2
99
  source:
 
100
  url: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=Menouar/phi-2-basic-maths
101
+ name: Open LLM Leaderboard
102
+ - task:
 
103
  type: text-generation
104
  name: Text Generation
105
  dataset:
 
110
  args:
111
  num_few_shot: 5
112
  metrics:
113
+ - type: acc
114
+ value: 30.7
115
+ name: accuracy
116
  source:
117
+ url: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=Menouar/phi-2-basic-maths
118
  name: Open LLM Leaderboard
119
+ - task:
120
+ type: text-generation
121
+ name: Text Generation
122
+ dataset:
123
+ name: TruthfulQA (0-shot)
124
+ type: truthful_qa
125
+ config: multiple_choice
126
+ split: validation
127
+ args:
128
+ num_few_shot: 0
129
+ metrics:
130
+ - type: mc2
131
+ value: 41.4
132
+ source:
133
  url: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=Menouar/phi-2-basic-maths
134
+ name: Open LLM Leaderboard
 
 
 
 
 
135
  ---
136
 
137
  # phi-2-basic-maths
 
200
  - Transformers 4.38.0.dev0
201
  - Pytorch 2.1.0+cu121
202
  - Datasets 2.16.1
203
+ - Tokenizers 0.15.1
204
+ # [Open LLM Leaderboard Evaluation Results](https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard)
205
+ Detailed results can be found [here](https://huggingface.co/datasets/open-llm-leaderboard/details_Menouar__phi-2-basic-maths)
206
+
207
+ | Metric |Value|
208
+ |---------------------------------|----:|
209
+ |Avg. |53.60|
210
+ |AI2 Reasoning Challenge (25-Shot)|55.80|
211
+ |HellaSwag (10-Shot) |71.15|
212
+ |MMLU (5-Shot) |47.27|
213
+ |TruthfulQA (0-shot) |41.40|
214
+ |Winogrande (5-shot) |75.30|
215
+ |GSM8k (5-shot) |30.71|
216
+