sequelbox commited on
Commit
aac97f0
1 Parent(s): 13f058d
Files changed (1) hide show
  1. README.md +95 -0
README.md CHANGED
@@ -31,6 +31,101 @@ datasets:
31
  - sequelbox/Celestia
32
  - sequelbox/Supernova
33
  model_type: llama
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
34
  license: llama3.1
35
  ---
36
 
 
31
  - sequelbox/Celestia
32
  - sequelbox/Supernova
33
  model_type: llama
34
+ model-index:
35
+ - name: Llama3.1-8B-ShiningValiant2
36
+ results:
37
+ - task:
38
+ type: text-generation
39
+ name: Text Generation
40
+ dataset:
41
+ name: IFEval (0-Shot)
42
+ type: HuggingFaceH4/ifeval
43
+ args:
44
+ num_few_shot: 0
45
+ metrics:
46
+ - type: inst_level_strict_acc and prompt_level_strict_acc
47
+ value: 65.24
48
+ name: strict accuracy
49
+ source:
50
+ url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=ValiantLabs/Llama3.1-8B-ShiningValiant2
51
+ name: Open LLM Leaderboard
52
+ - task:
53
+ type: text-generation
54
+ name: Text Generation
55
+ dataset:
56
+ name: BBH (3-Shot)
57
+ type: BBH
58
+ args:
59
+ num_few_shot: 3
60
+ metrics:
61
+ - type: acc_norm
62
+ value: 26.35
63
+ name: normalized accuracy
64
+ source:
65
+ url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=ValiantLabs/Llama3.1-8B-ShiningValiant2
66
+ name: Open LLM Leaderboard
67
+ - task:
68
+ type: text-generation
69
+ name: Text Generation
70
+ dataset:
71
+ name: MATH Lvl 5 (4-Shot)
72
+ type: hendrycks/competition_math
73
+ args:
74
+ num_few_shot: 4
75
+ metrics:
76
+ - type: exact_match
77
+ value: 11.63
78
+ name: exact match
79
+ source:
80
+ url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=ValiantLabs/Llama3.1-8B-ShiningValiant2
81
+ name: Open LLM Leaderboard
82
+ - task:
83
+ type: text-generation
84
+ name: Text Generation
85
+ dataset:
86
+ name: GPQA (0-shot)
87
+ type: Idavidrein/gpqa
88
+ args:
89
+ num_few_shot: 0
90
+ metrics:
91
+ - type: acc_norm
92
+ value: 8.95
93
+ name: acc_norm
94
+ source:
95
+ url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=ValiantLabs/Llama3.1-8B-ShiningValiant2
96
+ name: Open LLM Leaderboard
97
+ - task:
98
+ type: text-generation
99
+ name: Text Generation
100
+ dataset:
101
+ name: MuSR (0-shot)
102
+ type: TAUR-Lab/MuSR
103
+ args:
104
+ num_few_shot: 0
105
+ metrics:
106
+ - type: acc_norm
107
+ value: 7.19
108
+ name: acc_norm
109
+ source:
110
+ url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=ValiantLabs/Llama3.1-8B-ShiningValiant2
111
+ name: Open LLM Leaderboard
112
+ - task:
113
+ type: text-generation
114
+ name: Text Generation
115
+ dataset:
116
+ name: MMLU-PRO (5-shot)
117
+ type: TIGER-Lab/MMLU-Pro
118
+ config: main
119
+ split: test
120
+ args:
121
+ num_few_shot: 5
122
+ metrics:
123
+ - type: acc
124
+ value: 26.38
125
+ name: accuracy
126
+ source:
127
+ url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=ValiantLabs/Llama3.1-8B-ShiningValiant2
128
+ name: Open LLM Leaderboard
129
  license: llama3.1
130
  ---
131