Upload ./Qwen2-7B-Q4_K_M.mmlu.pro.txt with huggingface_hub
Browse files- Qwen2-7B-Q4_K_M.mmlu.pro.txt +2 -80
Qwen2-7B-Q4_K_M.mmlu.pro.txt
CHANGED
@@ -1,80 +1,2 @@
|
|
1 |
-
multiple_choice_score: there are
|
2 |
-
multiple_choice_score: reading
|
3 |
-
multiple_choice_score: preparing task data......................................................................done
|
4 |
-
multiple_choice_score : calculating TruthfulQA score over 70 tasks.
|
5 |
-
|
6 |
-
task acc_norm
|
7 |
-
1 0.00000000
|
8 |
-
2 50.00000000
|
9 |
-
3 33.33333333
|
10 |
-
4 25.00000000
|
11 |
-
5 40.00000000
|
12 |
-
6 33.33333333
|
13 |
-
7 28.57142857
|
14 |
-
8 25.00000000
|
15 |
-
9 22.22222222
|
16 |
-
10 30.00000000
|
17 |
-
11 27.27272727
|
18 |
-
12 33.33333333
|
19 |
-
13 30.76923077
|
20 |
-
14 35.71428571
|
21 |
-
15 40.00000000
|
22 |
-
16 37.50000000
|
23 |
-
17 35.29411765
|
24 |
-
18 33.33333333
|
25 |
-
19 31.57894737
|
26 |
-
20 30.00000000
|
27 |
-
21 28.57142857
|
28 |
-
22 27.27272727
|
29 |
-
23 26.08695652
|
30 |
-
24 25.00000000
|
31 |
-
25 24.00000000
|
32 |
-
26 23.07692308
|
33 |
-
27 22.22222222
|
34 |
-
28 21.42857143
|
35 |
-
29 20.68965517
|
36 |
-
30 20.00000000
|
37 |
-
31 19.35483871
|
38 |
-
32 18.75000000
|
39 |
-
33 21.21212121
|
40 |
-
34 20.58823529
|
41 |
-
35 20.00000000
|
42 |
-
36 19.44444444
|
43 |
-
37 18.91891892
|
44 |
-
38 18.42105263
|
45 |
-
39 17.94871795
|
46 |
-
40 20.00000000
|
47 |
-
41 19.51219512
|
48 |
-
42 19.04761905
|
49 |
-
43 18.60465116
|
50 |
-
44 18.18181818
|
51 |
-
45 17.77777778
|
52 |
-
46 19.56521739
|
53 |
-
47 19.14893617
|
54 |
-
48 18.75000000
|
55 |
-
49 18.36734694
|
56 |
-
50 18.00000000
|
57 |
-
51 17.64705882
|
58 |
-
52 17.30769231
|
59 |
-
53 18.86792453
|
60 |
-
54 18.51851852
|
61 |
-
55 20.00000000
|
62 |
-
56 19.64285714
|
63 |
-
57 19.29824561
|
64 |
-
58 18.96551724
|
65 |
-
59 18.64406780
|
66 |
-
60 20.00000000
|
67 |
-
61 19.67213115
|
68 |
-
62 19.35483871
|
69 |
-
63 19.04761905
|
70 |
-
64 20.31250000
|
71 |
-
65 20.00000000
|
72 |
-
66 19.69696970
|
73 |
-
67 19.40298507
|
74 |
-
68 19.11764706
|
75 |
-
69 18.84057971
|
76 |
-
70 18.57142857
|
77 |
-
|
78 |
-
Final result: 18.5714 +/- 4.6815
|
79 |
-
Random chance: 10.0000 +/- 3.6116
|
80 |
-
|
|
|
1 |
+
multiple_choice_score: there are 12032 tasks in prompt
|
2 |
+
multiple_choice_score: reading tasksmultiple_choice_score: failed to read task 1 of 12032
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|