Update README.md
Browse files
README.md
CHANGED
@@ -99,6 +99,35 @@ In LM-Studio, simply select the ChatML Prefix on the settings side pane:
|
|
99 |
|
100 |
Average: 0.4399
|
101 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
102 |
## GPT4All
|
103 |
|
104 |
```
|
|
|
99 |
|
100 |
Average: 0.4399
|
101 |
|
102 |
+
## BigBench Hard
|
103 |
+
|
104 |
+
```
|
105 |
+
hf-causal-experimental (pretrained=openaccess-ai-collective/dpopenhermes-alpha-v1,dtype=bfloat16,trust_remote_code=True,use_accelerate=True), limit: None, provide_description: False, num_fewshot: 0, batch_size: 16
|
106 |
+
| Task |Version| Metric |Value | |Stderr|
|
107 |
+
|------------------------------------------------|------:|---------------------|-----:|---|-----:|
|
108 |
+
|bigbench_causal_judgement | 0|multiple_choice_grade|0.5632|_ |0.0361|
|
109 |
+
|bigbench_date_understanding | 0|multiple_choice_grade|0.6612|_ |0.0247|
|
110 |
+
|bigbench_disambiguation_qa | 0|multiple_choice_grade|0.3566|_ |0.0299|
|
111 |
+
|bigbench_geometric_shapes | 0|multiple_choice_grade|0.2006|_ |0.0212|
|
112 |
+
| | |exact_str_match |0.0334|_ |0.0095|
|
113 |
+
|bigbench_logical_deduction_five_objects | 0|multiple_choice_grade|0.3020|_ |0.0206|
|
114 |
+
|bigbench_logical_deduction_seven_objects | 0|multiple_choice_grade|0.2086|_ |0.0154|
|
115 |
+
|bigbench_logical_deduction_three_objects | 0|multiple_choice_grade|0.5033|_ |0.0289|
|
116 |
+
|bigbench_movie_recommendation | 0|multiple_choice_grade|0.4220|_ |0.0221|
|
117 |
+
|bigbench_navigate | 0|multiple_choice_grade|0.5000|_ |0.0158|
|
118 |
+
|bigbench_reasoning_about_colored_objects | 0|multiple_choice_grade|0.7035|_ |0.0102|
|
119 |
+
|bigbench_ruin_names | 0|multiple_choice_grade|0.4107|_ |0.0233|
|
120 |
+
|bigbench_salient_translation_error_detection | 0|multiple_choice_grade|0.2154|_ |0.0130|
|
121 |
+
|bigbench_snarks | 0|multiple_choice_grade|0.7127|_ |0.0337|
|
122 |
+
|bigbench_sports_understanding | 0|multiple_choice_grade|0.6988|_ |0.0146|
|
123 |
+
|bigbench_temporal_sequences | 0|multiple_choice_grade|0.4670|_ |0.0158|
|
124 |
+
|bigbench_tracking_shuffled_objects_five_objects | 0|multiple_choice_grade|0.2072|_ |0.0115|
|
125 |
+
|bigbench_tracking_shuffled_objects_seven_objects| 0|multiple_choice_grade|0.1731|_ |0.0090|
|
126 |
+
|bigbench_tracking_shuffled_objects_three_objects| 0|multiple_choice_grade|0.5033|_ |0.0289|
|
127 |
+
```
|
128 |
+
|
129 |
+
Average: 0.4338
|
130 |
+
|
131 |
## GPT4All
|
132 |
|
133 |
```
|