Update README.md
Browse files
README.md
CHANGED
@@ -34,7 +34,7 @@ seperator = "\n\n"
|
|
34 |
dtype = torch.bfloat16
|
35 |
|
36 |
model_path = "path/to/llama-2-70b-hf"
|
37 |
-
qlora_path = "path/to/dromedary-2-70b-qlora-delta-v0"
|
38 |
|
39 |
bnb_config = BitsAndBytesConfig(
|
40 |
load_in_4bit=True,
|
@@ -96,4 +96,7 @@ The primary intended users of the model are researchers in artificial intelligen
|
|
96 |
7.5K unlabeled prompts from MATH
|
97 |
|
98 |
## Evaluation dataset
|
99 |
-
We evaluate Dromedary on
|
|
|
|
|
|
|
|
34 |
dtype = torch.bfloat16
|
35 |
|
36 |
model_path = "path/to/llama-2-70b-hf"
|
37 |
+
qlora_path = "path/to/dromedary-2-70b-qlora-delta-v0" # i.e., this model hub
|
38 |
|
39 |
bnb_config = BitsAndBytesConfig(
|
40 |
load_in_4bit=True,
|
|
|
96 |
7.5K unlabeled prompts from MATH
|
97 |
|
98 |
## Evaluation dataset
|
99 |
+
We evaluate Dromedary-2 on:
|
100 |
+
1. Chatbot benchmarks: Vicuna-Bench, MT-Bench, AlpacaEval
|
101 |
+
2. Capability benchmarks: Big-Bench Hard (reasoning), HumanEval (coding), TydiQA (multilingualism)
|
102 |
+
3. Truthfulness benchmarks: TruthfulQA
|