ToluClassics
commited on
Commit
•
9691525
1
Parent(s):
517949c
add mmlu translate
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- content.py +1 -0
- evals/afrimmlu_translate/afrimmlu_translate_amh-Meta-Llama-3-8B-Instruct.json +25 -0
- evals/afrimmlu_translate/afrimmlu_translate_amh-afriteva_v2_large_ayaft.json +25 -0
- evals/afrimmlu_translate/afrimmlu_translate_amh-aya-101.json +25 -0
- evals/afrimmlu_translate/afrimmlu_translate_amh-bloomz-1b7.json +25 -0
- evals/afrimmlu_translate/afrimmlu_translate_amh-bloomz-3b.json +25 -0
- evals/afrimmlu_translate/afrimmlu_translate_amh-bloomz-7b1.json +25 -0
- evals/afrimmlu_translate/afrimmlu_translate_amh-flan-t5-base.json +25 -0
- evals/afrimmlu_translate/afrimmlu_translate_amh-flan-t5-large.json +25 -0
- evals/afrimmlu_translate/afrimmlu_translate_amh-flan-t5-small.json +25 -0
- evals/afrimmlu_translate/afrimmlu_translate_amh-mt0-base.json +25 -0
- evals/afrimmlu_translate/afrimmlu_translate_amh-mt0-large.json +25 -0
- evals/afrimmlu_translate/afrimmlu_translate_amh-mt0-small.json +25 -0
- evals/afrimmlu_translate/afrimmlu_translate_amh-mt0-xl.json +25 -0
- evals/afrimmlu_translate/afrimmlu_translate_ewe-Meta-Llama-3-8B-Instruct.json +25 -0
- evals/afrimmlu_translate/afrimmlu_translate_ewe-afriteva_v2_large_ayaft.json +25 -0
- evals/afrimmlu_translate/afrimmlu_translate_ewe-aya-101.json +25 -0
- evals/afrimmlu_translate/afrimmlu_translate_ewe-bloomz-1b7.json +25 -0
- evals/afrimmlu_translate/afrimmlu_translate_ewe-bloomz-3b.json +25 -0
- evals/afrimmlu_translate/afrimmlu_translate_ewe-bloomz-7b1.json +25 -0
- evals/afrimmlu_translate/afrimmlu_translate_ewe-flan-t5-base.json +25 -0
- evals/afrimmlu_translate/afrimmlu_translate_ewe-flan-t5-large.json +25 -0
- evals/afrimmlu_translate/afrimmlu_translate_ewe-flan-t5-small.json +25 -0
- evals/afrimmlu_translate/afrimmlu_translate_ewe-mt0-base.json +25 -0
- evals/afrimmlu_translate/afrimmlu_translate_ewe-mt0-large.json +25 -0
- evals/afrimmlu_translate/afrimmlu_translate_ewe-mt0-small.json +25 -0
- evals/afrimmlu_translate/afrimmlu_translate_ewe-mt0-xl.json +25 -0
- evals/afrimmlu_translate/afrimmlu_translate_hau-Meta-Llama-3-8B-Instruct.json +25 -0
- evals/afrimmlu_translate/afrimmlu_translate_hau-afriteva_v2_large_ayaft.json +25 -0
- evals/afrimmlu_translate/afrimmlu_translate_hau-aya-101.json +25 -0
- evals/afrimmlu_translate/afrimmlu_translate_hau-bloomz-1b7.json +25 -0
- evals/afrimmlu_translate/afrimmlu_translate_hau-bloomz-3b.json +25 -0
- evals/afrimmlu_translate/afrimmlu_translate_hau-bloomz-7b1.json +25 -0
- evals/afrimmlu_translate/afrimmlu_translate_hau-flan-t5-base.json +25 -0
- evals/afrimmlu_translate/afrimmlu_translate_hau-flan-t5-large.json +25 -0
- evals/afrimmlu_translate/afrimmlu_translate_hau-flan-t5-small.json +25 -0
- evals/afrimmlu_translate/afrimmlu_translate_hau-mt0-base.json +25 -0
- evals/afrimmlu_translate/afrimmlu_translate_hau-mt0-large.json +25 -0
- evals/afrimmlu_translate/afrimmlu_translate_hau-mt0-small.json +25 -0
- evals/afrimmlu_translate/afrimmlu_translate_hau-mt0-xl.json +25 -0
- evals/afrimmlu_translate/afrimmlu_translate_ibo-Meta-Llama-3-8B-Instruct.json +25 -0
- evals/afrimmlu_translate/afrimmlu_translate_ibo-afriteva_v2_large_ayaft.json +25 -0
- evals/afrimmlu_translate/afrimmlu_translate_ibo-aya-101.json +25 -0
- evals/afrimmlu_translate/afrimmlu_translate_ibo-bloomz-1b7.json +25 -0
- evals/afrimmlu_translate/afrimmlu_translate_ibo-bloomz-3b.json +25 -0
- evals/afrimmlu_translate/afrimmlu_translate_ibo-bloomz-7b1.json +25 -0
- evals/afrimmlu_translate/afrimmlu_translate_ibo-flan-t5-base.json +25 -0
- evals/afrimmlu_translate/afrimmlu_translate_ibo-flan-t5-large.json +25 -0
- evals/afrimmlu_translate/afrimmlu_translate_ibo-flan-t5-small.json +25 -0
- evals/afrimmlu_translate/afrimmlu_translate_ibo-mt0-base.json +25 -0
content.py
CHANGED
@@ -3,6 +3,7 @@ TITLE = '<h1 align="center" id="space-title">African Languages LLM Eval Leaderbo
|
|
3 |
INTRO_TEXT = f"""
|
4 |
## About
|
5 |
This leaderboard tracks progress and ranks performance of large language models (LLMs) on African languages.
|
|
|
6 |
|
7 |
We currently evaluate models over the following benchmarks:
|
8 |
|
|
|
3 |
INTRO_TEXT = f"""
|
4 |
## About
|
5 |
This leaderboard tracks progress and ranks performance of large language models (LLMs) on African languages.
|
6 |
+
This project uses the [lm-evaluation-harness by EleutherAI](https://github.com/EleutherAI/lm-evaluation-harness) for evaluation, focusing on African language tasks. Some of the tasks contained in this leaderboard have already been built into the harness (e.g. IrokoBench Tasks & Belebele).
|
7 |
|
8 |
We currently evaluate models over the following benchmarks:
|
9 |
|
evals/afrimmlu_translate/afrimmlu_translate_amh-Meta-Llama-3-8B-Instruct.json
ADDED
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"results": {
|
3 |
+
"afrimmlu_translate_amh": {
|
4 |
+
"acc": 0.394,
|
5 |
+
"acc_stderr": 0.021874299301689253,
|
6 |
+
"f1": 0.3911411480086645,
|
7 |
+
"f1_stderr": "N/A"
|
8 |
+
}
|
9 |
+
},
|
10 |
+
"versions": {
|
11 |
+
"afrimmlu_translate_amh": 1.0,
|
12 |
+
"wandb_run_name": "happy-fog-3"
|
13 |
+
},
|
14 |
+
"config": {
|
15 |
+
"model": "hf",
|
16 |
+
"model_args": "pretrained=meta-llama/Meta-Llama-3-8B-Instruct",
|
17 |
+
"batch_size": 8,
|
18 |
+
"device": null,
|
19 |
+
"model_dtype": "torch.bfloat16",
|
20 |
+
"numpy_seed": 42,
|
21 |
+
"torch_seed": 42,
|
22 |
+
"random_seed": 42,
|
23 |
+
"fewshot_seed": 42
|
24 |
+
}
|
25 |
+
}
|
evals/afrimmlu_translate/afrimmlu_translate_amh-afriteva_v2_large_ayaft.json
ADDED
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"results": {
|
3 |
+
"afrimmlu_translate_amh": {
|
4 |
+
"acc": 0.212,
|
5 |
+
"acc_stderr": 0.01829703700401389,
|
6 |
+
"f1": 0.193400184674494,
|
7 |
+
"f1_stderr": "N/A"
|
8 |
+
}
|
9 |
+
},
|
10 |
+
"versions": {
|
11 |
+
"afrimmlu_translate_amh": 1.0,
|
12 |
+
"wandb_run_name": "cool-moon-2"
|
13 |
+
},
|
14 |
+
"config": {
|
15 |
+
"model": "hf",
|
16 |
+
"model_args": "pretrained=taresco/afriteva_v2_large_ayaft",
|
17 |
+
"batch_size": 8,
|
18 |
+
"device": null,
|
19 |
+
"model_dtype": "torch.float32",
|
20 |
+
"numpy_seed": 42,
|
21 |
+
"torch_seed": 42,
|
22 |
+
"random_seed": 42,
|
23 |
+
"fewshot_seed": 42
|
24 |
+
}
|
25 |
+
}
|
evals/afrimmlu_translate/afrimmlu_translate_amh-aya-101.json
ADDED
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"results": {
|
3 |
+
"afrimmlu_translate_amh": {
|
4 |
+
"acc": 0.32,
|
5 |
+
"acc_stderr": 0.020882340488761805,
|
6 |
+
"f1": 0.321055726365249,
|
7 |
+
"f1_stderr": "N/A"
|
8 |
+
}
|
9 |
+
},
|
10 |
+
"versions": {
|
11 |
+
"afrimmlu_translate_amh": 1.0,
|
12 |
+
"wandb_run_name": "major-rain-4"
|
13 |
+
},
|
14 |
+
"config": {
|
15 |
+
"model": "hf",
|
16 |
+
"model_args": "pretrained=CohereForAI/aya-101,parallelize=True",
|
17 |
+
"batch_size": 8,
|
18 |
+
"device": null,
|
19 |
+
"model_dtype": "torch.float32",
|
20 |
+
"numpy_seed": 42,
|
21 |
+
"torch_seed": 42,
|
22 |
+
"random_seed": 42,
|
23 |
+
"fewshot_seed": 42
|
24 |
+
}
|
25 |
+
}
|
evals/afrimmlu_translate/afrimmlu_translate_amh-bloomz-1b7.json
ADDED
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"results": {
|
3 |
+
"afrimmlu_translate_amh": {
|
4 |
+
"acc": 0.232,
|
5 |
+
"acc_stderr": 0.018896193591952055,
|
6 |
+
"f1": 0.2303475443171238,
|
7 |
+
"f1_stderr": "N/A"
|
8 |
+
}
|
9 |
+
},
|
10 |
+
"versions": {
|
11 |
+
"afrimmlu_translate_amh": 1.0,
|
12 |
+
"wandb_run_name": "amber-sponge-5"
|
13 |
+
},
|
14 |
+
"config": {
|
15 |
+
"model": "hf",
|
16 |
+
"model_args": "pretrained=bigscience/bloomz-1b7",
|
17 |
+
"batch_size": 8,
|
18 |
+
"device": null,
|
19 |
+
"model_dtype": "torch.float16",
|
20 |
+
"numpy_seed": 42,
|
21 |
+
"torch_seed": 42,
|
22 |
+
"random_seed": 42,
|
23 |
+
"fewshot_seed": 42
|
24 |
+
}
|
25 |
+
}
|
evals/afrimmlu_translate/afrimmlu_translate_amh-bloomz-3b.json
ADDED
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"results": {
|
3 |
+
"afrimmlu_translate_amh": {
|
4 |
+
"acc": 0.252,
|
5 |
+
"acc_stderr": 0.019435727282249543,
|
6 |
+
"f1": 0.2511004641341941,
|
7 |
+
"f1_stderr": "N/A"
|
8 |
+
}
|
9 |
+
},
|
10 |
+
"versions": {
|
11 |
+
"afrimmlu_translate_amh": 1.0,
|
12 |
+
"wandb_run_name": "atomic-lake-6"
|
13 |
+
},
|
14 |
+
"config": {
|
15 |
+
"model": "hf",
|
16 |
+
"model_args": "pretrained=bigscience/bloomz-3b",
|
17 |
+
"batch_size": 8,
|
18 |
+
"device": null,
|
19 |
+
"model_dtype": "torch.float16",
|
20 |
+
"numpy_seed": 42,
|
21 |
+
"torch_seed": 42,
|
22 |
+
"random_seed": 42,
|
23 |
+
"fewshot_seed": 42
|
24 |
+
}
|
25 |
+
}
|
evals/afrimmlu_translate/afrimmlu_translate_amh-bloomz-7b1.json
ADDED
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"results": {
|
3 |
+
"afrimmlu_translate_amh": {
|
4 |
+
"acc": 0.264,
|
5 |
+
"acc_stderr": 0.019732885585922084,
|
6 |
+
"f1": 0.2611884211157269,
|
7 |
+
"f1_stderr": "N/A"
|
8 |
+
}
|
9 |
+
},
|
10 |
+
"versions": {
|
11 |
+
"afrimmlu_translate_amh": 1.0,
|
12 |
+
"wandb_run_name": "silver-moon-7"
|
13 |
+
},
|
14 |
+
"config": {
|
15 |
+
"model": "hf",
|
16 |
+
"model_args": "pretrained=bigscience/bloomz-7b1",
|
17 |
+
"batch_size": 8,
|
18 |
+
"device": null,
|
19 |
+
"model_dtype": "torch.float16",
|
20 |
+
"numpy_seed": 42,
|
21 |
+
"torch_seed": 42,
|
22 |
+
"random_seed": 42,
|
23 |
+
"fewshot_seed": 42
|
24 |
+
}
|
25 |
+
}
|
evals/afrimmlu_translate/afrimmlu_translate_amh-flan-t5-base.json
ADDED
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"results": {
|
3 |
+
"afrimmlu_translate_amh": {
|
4 |
+
"acc": 0.23,
|
5 |
+
"acc_stderr": 0.01883905039112313,
|
6 |
+
"f1": 0.22635667655752564,
|
7 |
+
"f1_stderr": "N/A"
|
8 |
+
}
|
9 |
+
},
|
10 |
+
"versions": {
|
11 |
+
"afrimmlu_translate_amh": 1.0,
|
12 |
+
"wandb_run_name": "serene-frost-12"
|
13 |
+
},
|
14 |
+
"config": {
|
15 |
+
"model": "hf",
|
16 |
+
"model_args": "pretrained=google/flan-t5-base",
|
17 |
+
"batch_size": 8,
|
18 |
+
"device": null,
|
19 |
+
"model_dtype": "torch.float32",
|
20 |
+
"numpy_seed": 42,
|
21 |
+
"torch_seed": 42,
|
22 |
+
"random_seed": 42,
|
23 |
+
"fewshot_seed": 42
|
24 |
+
}
|
25 |
+
}
|
evals/afrimmlu_translate/afrimmlu_translate_amh-flan-t5-large.json
ADDED
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"results": {
|
3 |
+
"afrimmlu_translate_amh": {
|
4 |
+
"acc": 0.306,
|
5 |
+
"acc_stderr": 0.020629569998345417,
|
6 |
+
"f1": 0.30646879281309314,
|
7 |
+
"f1_stderr": "N/A"
|
8 |
+
}
|
9 |
+
},
|
10 |
+
"versions": {
|
11 |
+
"afrimmlu_translate_amh": 1.0,
|
12 |
+
"wandb_run_name": "whole-serenity-13"
|
13 |
+
},
|
14 |
+
"config": {
|
15 |
+
"model": "hf",
|
16 |
+
"model_args": "pretrained=google/flan-t5-large",
|
17 |
+
"batch_size": 8,
|
18 |
+
"device": null,
|
19 |
+
"model_dtype": "torch.float32",
|
20 |
+
"numpy_seed": 42,
|
21 |
+
"torch_seed": 42,
|
22 |
+
"random_seed": 42,
|
23 |
+
"fewshot_seed": 42
|
24 |
+
}
|
25 |
+
}
|
evals/afrimmlu_translate/afrimmlu_translate_amh-flan-t5-small.json
ADDED
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"results": {
|
3 |
+
"afrimmlu_translate_amh": {
|
4 |
+
"acc": 0.26,
|
5 |
+
"acc_stderr": 0.019635965529725512,
|
6 |
+
"f1": 0.25428716687048475,
|
7 |
+
"f1_stderr": "N/A"
|
8 |
+
}
|
9 |
+
},
|
10 |
+
"versions": {
|
11 |
+
"afrimmlu_translate_amh": 1.0,
|
12 |
+
"wandb_run_name": "fresh-moon-14"
|
13 |
+
},
|
14 |
+
"config": {
|
15 |
+
"model": "hf",
|
16 |
+
"model_args": "pretrained=google/flan-t5-small",
|
17 |
+
"batch_size": 8,
|
18 |
+
"device": null,
|
19 |
+
"model_dtype": "torch.float32",
|
20 |
+
"numpy_seed": 42,
|
21 |
+
"torch_seed": 42,
|
22 |
+
"random_seed": 42,
|
23 |
+
"fewshot_seed": 42
|
24 |
+
}
|
25 |
+
}
|
evals/afrimmlu_translate/afrimmlu_translate_amh-mt0-base.json
ADDED
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"results": {
|
3 |
+
"afrimmlu_translate_amh": {
|
4 |
+
"acc": 0.242,
|
5 |
+
"acc_stderr": 0.019173085678337178,
|
6 |
+
"f1": 0.23588653079268743,
|
7 |
+
"f1_stderr": "N/A"
|
8 |
+
}
|
9 |
+
},
|
10 |
+
"versions": {
|
11 |
+
"afrimmlu_translate_amh": 1.0,
|
12 |
+
"wandb_run_name": "icy-star-8"
|
13 |
+
},
|
14 |
+
"config": {
|
15 |
+
"model": "hf",
|
16 |
+
"model_args": "pretrained=bigscience/mt0-base",
|
17 |
+
"batch_size": 8,
|
18 |
+
"device": null,
|
19 |
+
"model_dtype": "torch.float32",
|
20 |
+
"numpy_seed": 42,
|
21 |
+
"torch_seed": 42,
|
22 |
+
"random_seed": 42,
|
23 |
+
"fewshot_seed": 42
|
24 |
+
}
|
25 |
+
}
|
evals/afrimmlu_translate/afrimmlu_translate_amh-mt0-large.json
ADDED
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"results": {
|
3 |
+
"afrimmlu_translate_amh": {
|
4 |
+
"acc": 0.252,
|
5 |
+
"acc_stderr": 0.01943572728224953,
|
6 |
+
"f1": 0.2492758416335272,
|
7 |
+
"f1_stderr": "N/A"
|
8 |
+
}
|
9 |
+
},
|
10 |
+
"versions": {
|
11 |
+
"afrimmlu_translate_amh": 1.0,
|
12 |
+
"wandb_run_name": "light-cherry-9"
|
13 |
+
},
|
14 |
+
"config": {
|
15 |
+
"model": "hf",
|
16 |
+
"model_args": "pretrained=bigscience/mt0-large",
|
17 |
+
"batch_size": 8,
|
18 |
+
"device": null,
|
19 |
+
"model_dtype": "torch.float32",
|
20 |
+
"numpy_seed": 42,
|
21 |
+
"torch_seed": 42,
|
22 |
+
"random_seed": 42,
|
23 |
+
"fewshot_seed": 42
|
24 |
+
}
|
25 |
+
}
|
evals/afrimmlu_translate/afrimmlu_translate_amh-mt0-small.json
ADDED
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"results": {
|
3 |
+
"afrimmlu_translate_amh": {
|
4 |
+
"acc": 0.228,
|
5 |
+
"acc_stderr": 0.018781306529363197,
|
6 |
+
"f1": 0.21952920076616783,
|
7 |
+
"f1_stderr": "N/A"
|
8 |
+
}
|
9 |
+
},
|
10 |
+
"versions": {
|
11 |
+
"afrimmlu_translate_amh": 1.0,
|
12 |
+
"wandb_run_name": "brisk-sun-10"
|
13 |
+
},
|
14 |
+
"config": {
|
15 |
+
"model": "hf",
|
16 |
+
"model_args": "pretrained=bigscience/mt0-small",
|
17 |
+
"batch_size": 8,
|
18 |
+
"device": null,
|
19 |
+
"model_dtype": "torch.float32",
|
20 |
+
"numpy_seed": 42,
|
21 |
+
"torch_seed": 42,
|
22 |
+
"random_seed": 42,
|
23 |
+
"fewshot_seed": 42
|
24 |
+
}
|
25 |
+
}
|
evals/afrimmlu_translate/afrimmlu_translate_amh-mt0-xl.json
ADDED
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"results": {
|
3 |
+
"afrimmlu_translate_amh": {
|
4 |
+
"acc": 0.318,
|
5 |
+
"acc_stderr": 0.020847571620814017,
|
6 |
+
"f1": 0.3106228371529179,
|
7 |
+
"f1_stderr": "N/A"
|
8 |
+
}
|
9 |
+
},
|
10 |
+
"versions": {
|
11 |
+
"afrimmlu_translate_amh": 1.0,
|
12 |
+
"wandb_run_name": "soft-dew-11"
|
13 |
+
},
|
14 |
+
"config": {
|
15 |
+
"model": "hf",
|
16 |
+
"model_args": "pretrained=bigscience/mt0-xl",
|
17 |
+
"batch_size": 8,
|
18 |
+
"device": null,
|
19 |
+
"model_dtype": "torch.float32",
|
20 |
+
"numpy_seed": 42,
|
21 |
+
"torch_seed": 42,
|
22 |
+
"random_seed": 42,
|
23 |
+
"fewshot_seed": 42
|
24 |
+
}
|
25 |
+
}
|
evals/afrimmlu_translate/afrimmlu_translate_ewe-Meta-Llama-3-8B-Instruct.json
ADDED
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"results": {
|
3 |
+
"afrimmlu_translate_ewe": {
|
4 |
+
"acc": 0.338,
|
5 |
+
"acc_stderr": 0.021175665695209407,
|
6 |
+
"f1": 0.33931031230441916,
|
7 |
+
"f1_stderr": "N/A"
|
8 |
+
}
|
9 |
+
},
|
10 |
+
"versions": {
|
11 |
+
"afrimmlu_translate_ewe": 1.0,
|
12 |
+
"wandb_run_name": "happy-fog-3"
|
13 |
+
},
|
14 |
+
"config": {
|
15 |
+
"model": "hf",
|
16 |
+
"model_args": "pretrained=meta-llama/Meta-Llama-3-8B-Instruct",
|
17 |
+
"batch_size": 8,
|
18 |
+
"device": null,
|
19 |
+
"model_dtype": "torch.bfloat16",
|
20 |
+
"numpy_seed": 42,
|
21 |
+
"torch_seed": 42,
|
22 |
+
"random_seed": 42,
|
23 |
+
"fewshot_seed": 42
|
24 |
+
}
|
25 |
+
}
|
evals/afrimmlu_translate/afrimmlu_translate_ewe-afriteva_v2_large_ayaft.json
ADDED
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"results": {
|
3 |
+
"afrimmlu_translate_ewe": {
|
4 |
+
"acc": 0.222,
|
5 |
+
"acc_stderr": 0.01860441475825008,
|
6 |
+
"f1": 0.21571520023861815,
|
7 |
+
"f1_stderr": "N/A"
|
8 |
+
}
|
9 |
+
},
|
10 |
+
"versions": {
|
11 |
+
"afrimmlu_translate_ewe": 1.0,
|
12 |
+
"wandb_run_name": "cool-moon-2"
|
13 |
+
},
|
14 |
+
"config": {
|
15 |
+
"model": "hf",
|
16 |
+
"model_args": "pretrained=taresco/afriteva_v2_large_ayaft",
|
17 |
+
"batch_size": 8,
|
18 |
+
"device": null,
|
19 |
+
"model_dtype": "torch.float32",
|
20 |
+
"numpy_seed": 42,
|
21 |
+
"torch_seed": 42,
|
22 |
+
"random_seed": 42,
|
23 |
+
"fewshot_seed": 42
|
24 |
+
}
|
25 |
+
}
|
evals/afrimmlu_translate/afrimmlu_translate_ewe-aya-101.json
ADDED
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"results": {
|
3 |
+
"afrimmlu_translate_ewe": {
|
4 |
+
"acc": 0.304,
|
5 |
+
"acc_stderr": 0.02059164957122493,
|
6 |
+
"f1": 0.2984881378963239,
|
7 |
+
"f1_stderr": "N/A"
|
8 |
+
}
|
9 |
+
},
|
10 |
+
"versions": {
|
11 |
+
"afrimmlu_translate_ewe": 1.0,
|
12 |
+
"wandb_run_name": "major-rain-4"
|
13 |
+
},
|
14 |
+
"config": {
|
15 |
+
"model": "hf",
|
16 |
+
"model_args": "pretrained=CohereForAI/aya-101,parallelize=True",
|
17 |
+
"batch_size": 8,
|
18 |
+
"device": null,
|
19 |
+
"model_dtype": "torch.float32",
|
20 |
+
"numpy_seed": 42,
|
21 |
+
"torch_seed": 42,
|
22 |
+
"random_seed": 42,
|
23 |
+
"fewshot_seed": 42
|
24 |
+
}
|
25 |
+
}
|
evals/afrimmlu_translate/afrimmlu_translate_ewe-bloomz-1b7.json
ADDED
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"results": {
|
3 |
+
"afrimmlu_translate_ewe": {
|
4 |
+
"acc": 0.244,
|
5 |
+
"acc_stderr": 0.0192267348936146,
|
6 |
+
"f1": 0.24428173105444703,
|
7 |
+
"f1_stderr": "N/A"
|
8 |
+
}
|
9 |
+
},
|
10 |
+
"versions": {
|
11 |
+
"afrimmlu_translate_ewe": 1.0,
|
12 |
+
"wandb_run_name": "amber-sponge-5"
|
13 |
+
},
|
14 |
+
"config": {
|
15 |
+
"model": "hf",
|
16 |
+
"model_args": "pretrained=bigscience/bloomz-1b7",
|
17 |
+
"batch_size": 8,
|
18 |
+
"device": null,
|
19 |
+
"model_dtype": "torch.float16",
|
20 |
+
"numpy_seed": 42,
|
21 |
+
"torch_seed": 42,
|
22 |
+
"random_seed": 42,
|
23 |
+
"fewshot_seed": 42
|
24 |
+
}
|
25 |
+
}
|
evals/afrimmlu_translate/afrimmlu_translate_ewe-bloomz-3b.json
ADDED
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"results": {
|
3 |
+
"afrimmlu_translate_ewe": {
|
4 |
+
"acc": 0.246,
|
5 |
+
"acc_stderr": 0.019279819056352548,
|
6 |
+
"f1": 0.2468667245003703,
|
7 |
+
"f1_stderr": "N/A"
|
8 |
+
}
|
9 |
+
},
|
10 |
+
"versions": {
|
11 |
+
"afrimmlu_translate_ewe": 1.0,
|
12 |
+
"wandb_run_name": "atomic-lake-6"
|
13 |
+
},
|
14 |
+
"config": {
|
15 |
+
"model": "hf",
|
16 |
+
"model_args": "pretrained=bigscience/bloomz-3b",
|
17 |
+
"batch_size": 8,
|
18 |
+
"device": null,
|
19 |
+
"model_dtype": "torch.float16",
|
20 |
+
"numpy_seed": 42,
|
21 |
+
"torch_seed": 42,
|
22 |
+
"random_seed": 42,
|
23 |
+
"fewshot_seed": 42
|
24 |
+
}
|
25 |
+
}
|
evals/afrimmlu_translate/afrimmlu_translate_ewe-bloomz-7b1.json
ADDED
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"results": {
|
3 |
+
"afrimmlu_translate_ewe": {
|
4 |
+
"acc": 0.272,
|
5 |
+
"acc_stderr": 0.01992048320956609,
|
6 |
+
"f1": 0.27134200495691096,
|
7 |
+
"f1_stderr": "N/A"
|
8 |
+
}
|
9 |
+
},
|
10 |
+
"versions": {
|
11 |
+
"afrimmlu_translate_ewe": 1.0,
|
12 |
+
"wandb_run_name": "silver-moon-7"
|
13 |
+
},
|
14 |
+
"config": {
|
15 |
+
"model": "hf",
|
16 |
+
"model_args": "pretrained=bigscience/bloomz-7b1",
|
17 |
+
"batch_size": 8,
|
18 |
+
"device": null,
|
19 |
+
"model_dtype": "torch.float16",
|
20 |
+
"numpy_seed": 42,
|
21 |
+
"torch_seed": 42,
|
22 |
+
"random_seed": 42,
|
23 |
+
"fewshot_seed": 42
|
24 |
+
}
|
25 |
+
}
|
evals/afrimmlu_translate/afrimmlu_translate_ewe-flan-t5-base.json
ADDED
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"results": {
|
3 |
+
"afrimmlu_translate_ewe": {
|
4 |
+
"acc": 0.274,
|
5 |
+
"acc_stderr": 0.01996610354027947,
|
6 |
+
"f1": 0.2733405472168771,
|
7 |
+
"f1_stderr": "N/A"
|
8 |
+
}
|
9 |
+
},
|
10 |
+
"versions": {
|
11 |
+
"afrimmlu_translate_ewe": 1.0,
|
12 |
+
"wandb_run_name": "serene-frost-12"
|
13 |
+
},
|
14 |
+
"config": {
|
15 |
+
"model": "hf",
|
16 |
+
"model_args": "pretrained=google/flan-t5-base",
|
17 |
+
"batch_size": 8,
|
18 |
+
"device": null,
|
19 |
+
"model_dtype": "torch.float32",
|
20 |
+
"numpy_seed": 42,
|
21 |
+
"torch_seed": 42,
|
22 |
+
"random_seed": 42,
|
23 |
+
"fewshot_seed": 42
|
24 |
+
}
|
25 |
+
}
|
evals/afrimmlu_translate/afrimmlu_translate_ewe-flan-t5-large.json
ADDED
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"results": {
|
3 |
+
"afrimmlu_translate_ewe": {
|
4 |
+
"acc": 0.256,
|
5 |
+
"acc_stderr": 0.019536923574747612,
|
6 |
+
"f1": 0.25476798405989093,
|
7 |
+
"f1_stderr": "N/A"
|
8 |
+
}
|
9 |
+
},
|
10 |
+
"versions": {
|
11 |
+
"afrimmlu_translate_ewe": 1.0,
|
12 |
+
"wandb_run_name": "whole-serenity-13"
|
13 |
+
},
|
14 |
+
"config": {
|
15 |
+
"model": "hf",
|
16 |
+
"model_args": "pretrained=google/flan-t5-large",
|
17 |
+
"batch_size": 8,
|
18 |
+
"device": null,
|
19 |
+
"model_dtype": "torch.float32",
|
20 |
+
"numpy_seed": 42,
|
21 |
+
"torch_seed": 42,
|
22 |
+
"random_seed": 42,
|
23 |
+
"fewshot_seed": 42
|
24 |
+
}
|
25 |
+
}
|
evals/afrimmlu_translate/afrimmlu_translate_ewe-flan-t5-small.json
ADDED
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"results": {
|
3 |
+
"afrimmlu_translate_ewe": {
|
4 |
+
"acc": 0.26,
|
5 |
+
"acc_stderr": 0.019635965529725512,
|
6 |
+
"f1": 0.24971217821262703,
|
7 |
+
"f1_stderr": "N/A"
|
8 |
+
}
|
9 |
+
},
|
10 |
+
"versions": {
|
11 |
+
"afrimmlu_translate_ewe": 1.0,
|
12 |
+
"wandb_run_name": "fresh-moon-14"
|
13 |
+
},
|
14 |
+
"config": {
|
15 |
+
"model": "hf",
|
16 |
+
"model_args": "pretrained=google/flan-t5-small",
|
17 |
+
"batch_size": 8,
|
18 |
+
"device": null,
|
19 |
+
"model_dtype": "torch.float32",
|
20 |
+
"numpy_seed": 42,
|
21 |
+
"torch_seed": 42,
|
22 |
+
"random_seed": 42,
|
23 |
+
"fewshot_seed": 42
|
24 |
+
}
|
25 |
+
}
|
evals/afrimmlu_translate/afrimmlu_translate_ewe-mt0-base.json
ADDED
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"results": {
|
3 |
+
"afrimmlu_translate_ewe": {
|
4 |
+
"acc": 0.226,
|
5 |
+
"acc_stderr": 0.018722956449139933,
|
6 |
+
"f1": 0.21750280479645925,
|
7 |
+
"f1_stderr": "N/A"
|
8 |
+
}
|
9 |
+
},
|
10 |
+
"versions": {
|
11 |
+
"afrimmlu_translate_ewe": 1.0,
|
12 |
+
"wandb_run_name": "icy-star-8"
|
13 |
+
},
|
14 |
+
"config": {
|
15 |
+
"model": "hf",
|
16 |
+
"model_args": "pretrained=bigscience/mt0-base",
|
17 |
+
"batch_size": 8,
|
18 |
+
"device": null,
|
19 |
+
"model_dtype": "torch.float32",
|
20 |
+
"numpy_seed": 42,
|
21 |
+
"torch_seed": 42,
|
22 |
+
"random_seed": 42,
|
23 |
+
"fewshot_seed": 42
|
24 |
+
}
|
25 |
+
}
|
evals/afrimmlu_translate/afrimmlu_translate_ewe-mt0-large.json
ADDED
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"results": {
|
3 |
+
"afrimmlu_translate_ewe": {
|
4 |
+
"acc": 0.238,
|
5 |
+
"acc_stderr": 0.01906407295819844,
|
6 |
+
"f1": 0.23136212205719056,
|
7 |
+
"f1_stderr": "N/A"
|
8 |
+
}
|
9 |
+
},
|
10 |
+
"versions": {
|
11 |
+
"afrimmlu_translate_ewe": 1.0,
|
12 |
+
"wandb_run_name": "light-cherry-9"
|
13 |
+
},
|
14 |
+
"config": {
|
15 |
+
"model": "hf",
|
16 |
+
"model_args": "pretrained=bigscience/mt0-large",
|
17 |
+
"batch_size": 8,
|
18 |
+
"device": null,
|
19 |
+
"model_dtype": "torch.float32",
|
20 |
+
"numpy_seed": 42,
|
21 |
+
"torch_seed": 42,
|
22 |
+
"random_seed": 42,
|
23 |
+
"fewshot_seed": 42
|
24 |
+
}
|
25 |
+
}
|
evals/afrimmlu_translate/afrimmlu_translate_ewe-mt0-small.json
ADDED
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"results": {
|
3 |
+
"afrimmlu_translate_ewe": {
|
4 |
+
"acc": 0.248,
|
5 |
+
"acc_stderr": 0.019332342821239107,
|
6 |
+
"f1": 0.23917170024216244,
|
7 |
+
"f1_stderr": "N/A"
|
8 |
+
}
|
9 |
+
},
|
10 |
+
"versions": {
|
11 |
+
"afrimmlu_translate_ewe": 1.0,
|
12 |
+
"wandb_run_name": "brisk-sun-10"
|
13 |
+
},
|
14 |
+
"config": {
|
15 |
+
"model": "hf",
|
16 |
+
"model_args": "pretrained=bigscience/mt0-small",
|
17 |
+
"batch_size": 8,
|
18 |
+
"device": null,
|
19 |
+
"model_dtype": "torch.float32",
|
20 |
+
"numpy_seed": 42,
|
21 |
+
"torch_seed": 42,
|
22 |
+
"random_seed": 42,
|
23 |
+
"fewshot_seed": 42
|
24 |
+
}
|
25 |
+
}
|
evals/afrimmlu_translate/afrimmlu_translate_ewe-mt0-xl.json
ADDED
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"results": {
|
3 |
+
"afrimmlu_translate_ewe": {
|
4 |
+
"acc": 0.29,
|
5 |
+
"acc_stderr": 0.02031317923174519,
|
6 |
+
"f1": 0.28795219737987676,
|
7 |
+
"f1_stderr": "N/A"
|
8 |
+
}
|
9 |
+
},
|
10 |
+
"versions": {
|
11 |
+
"afrimmlu_translate_ewe": 1.0,
|
12 |
+
"wandb_run_name": "soft-dew-11"
|
13 |
+
},
|
14 |
+
"config": {
|
15 |
+
"model": "hf",
|
16 |
+
"model_args": "pretrained=bigscience/mt0-xl",
|
17 |
+
"batch_size": 8,
|
18 |
+
"device": null,
|
19 |
+
"model_dtype": "torch.float32",
|
20 |
+
"numpy_seed": 42,
|
21 |
+
"torch_seed": 42,
|
22 |
+
"random_seed": 42,
|
23 |
+
"fewshot_seed": 42
|
24 |
+
}
|
25 |
+
}
|
evals/afrimmlu_translate/afrimmlu_translate_hau-Meta-Llama-3-8B-Instruct.json
ADDED
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"results": {
|
3 |
+
"afrimmlu_translate_hau": {
|
4 |
+
"acc": 0.36,
|
5 |
+
"acc_stderr": 0.02148775108972053,
|
6 |
+
"f1": 0.3604494892242261,
|
7 |
+
"f1_stderr": "N/A"
|
8 |
+
}
|
9 |
+
},
|
10 |
+
"versions": {
|
11 |
+
"afrimmlu_translate_hau": 1.0,
|
12 |
+
"wandb_run_name": "happy-fog-3"
|
13 |
+
},
|
14 |
+
"config": {
|
15 |
+
"model": "hf",
|
16 |
+
"model_args": "pretrained=meta-llama/Meta-Llama-3-8B-Instruct",
|
17 |
+
"batch_size": 8,
|
18 |
+
"device": null,
|
19 |
+
"model_dtype": "torch.bfloat16",
|
20 |
+
"numpy_seed": 42,
|
21 |
+
"torch_seed": 42,
|
22 |
+
"random_seed": 42,
|
23 |
+
"fewshot_seed": 42
|
24 |
+
}
|
25 |
+
}
|
evals/afrimmlu_translate/afrimmlu_translate_hau-afriteva_v2_large_ayaft.json
ADDED
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"results": {
|
3 |
+
"afrimmlu_translate_hau": {
|
4 |
+
"acc": 0.24,
|
5 |
+
"acc_stderr": 0.01911886665375974,
|
6 |
+
"f1": 0.2223787592766505,
|
7 |
+
"f1_stderr": "N/A"
|
8 |
+
}
|
9 |
+
},
|
10 |
+
"versions": {
|
11 |
+
"afrimmlu_translate_hau": 1.0,
|
12 |
+
"wandb_run_name": "cool-moon-2"
|
13 |
+
},
|
14 |
+
"config": {
|
15 |
+
"model": "hf",
|
16 |
+
"model_args": "pretrained=taresco/afriteva_v2_large_ayaft",
|
17 |
+
"batch_size": 8,
|
18 |
+
"device": null,
|
19 |
+
"model_dtype": "torch.float32",
|
20 |
+
"numpy_seed": 42,
|
21 |
+
"torch_seed": 42,
|
22 |
+
"random_seed": 42,
|
23 |
+
"fewshot_seed": 42
|
24 |
+
}
|
25 |
+
}
|
evals/afrimmlu_translate/afrimmlu_translate_hau-aya-101.json
ADDED
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"results": {
|
3 |
+
"afrimmlu_translate_hau": {
|
4 |
+
"acc": 0.308,
|
5 |
+
"acc_stderr": 0.0206670329874661,
|
6 |
+
"f1": 0.3039058851447556,
|
7 |
+
"f1_stderr": "N/A"
|
8 |
+
}
|
9 |
+
},
|
10 |
+
"versions": {
|
11 |
+
"afrimmlu_translate_hau": 1.0,
|
12 |
+
"wandb_run_name": "major-rain-4"
|
13 |
+
},
|
14 |
+
"config": {
|
15 |
+
"model": "hf",
|
16 |
+
"model_args": "pretrained=CohereForAI/aya-101,parallelize=True",
|
17 |
+
"batch_size": 8,
|
18 |
+
"device": null,
|
19 |
+
"model_dtype": "torch.float32",
|
20 |
+
"numpy_seed": 42,
|
21 |
+
"torch_seed": 42,
|
22 |
+
"random_seed": 42,
|
23 |
+
"fewshot_seed": 42
|
24 |
+
}
|
25 |
+
}
|
evals/afrimmlu_translate/afrimmlu_translate_hau-bloomz-1b7.json
ADDED
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"results": {
|
3 |
+
"afrimmlu_translate_hau": {
|
4 |
+
"acc": 0.274,
|
5 |
+
"acc_stderr": 0.019966103540279445,
|
6 |
+
"f1": 0.2724977541568708,
|
7 |
+
"f1_stderr": "N/A"
|
8 |
+
}
|
9 |
+
},
|
10 |
+
"versions": {
|
11 |
+
"afrimmlu_translate_hau": 1.0,
|
12 |
+
"wandb_run_name": "amber-sponge-5"
|
13 |
+
},
|
14 |
+
"config": {
|
15 |
+
"model": "hf",
|
16 |
+
"model_args": "pretrained=bigscience/bloomz-1b7",
|
17 |
+
"batch_size": 8,
|
18 |
+
"device": null,
|
19 |
+
"model_dtype": "torch.float16",
|
20 |
+
"numpy_seed": 42,
|
21 |
+
"torch_seed": 42,
|
22 |
+
"random_seed": 42,
|
23 |
+
"fewshot_seed": 42
|
24 |
+
}
|
25 |
+
}
|
evals/afrimmlu_translate/afrimmlu_translate_hau-bloomz-3b.json
ADDED
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"results": {
|
3 |
+
"afrimmlu_translate_hau": {
|
4 |
+
"acc": 0.304,
|
5 |
+
"acc_stderr": 0.020591649571224932,
|
6 |
+
"f1": 0.3034697818886015,
|
7 |
+
"f1_stderr": "N/A"
|
8 |
+
}
|
9 |
+
},
|
10 |
+
"versions": {
|
11 |
+
"afrimmlu_translate_hau": 1.0,
|
12 |
+
"wandb_run_name": "atomic-lake-6"
|
13 |
+
},
|
14 |
+
"config": {
|
15 |
+
"model": "hf",
|
16 |
+
"model_args": "pretrained=bigscience/bloomz-3b",
|
17 |
+
"batch_size": 8,
|
18 |
+
"device": null,
|
19 |
+
"model_dtype": "torch.float16",
|
20 |
+
"numpy_seed": 42,
|
21 |
+
"torch_seed": 42,
|
22 |
+
"random_seed": 42,
|
23 |
+
"fewshot_seed": 42
|
24 |
+
}
|
25 |
+
}
|
evals/afrimmlu_translate/afrimmlu_translate_hau-bloomz-7b1.json
ADDED
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"results": {
|
3 |
+
"afrimmlu_translate_hau": {
|
4 |
+
"acc": 0.32,
|
5 |
+
"acc_stderr": 0.020882340488761805,
|
6 |
+
"f1": 0.3200184974536263,
|
7 |
+
"f1_stderr": "N/A"
|
8 |
+
}
|
9 |
+
},
|
10 |
+
"versions": {
|
11 |
+
"afrimmlu_translate_hau": 1.0,
|
12 |
+
"wandb_run_name": "silver-moon-7"
|
13 |
+
},
|
14 |
+
"config": {
|
15 |
+
"model": "hf",
|
16 |
+
"model_args": "pretrained=bigscience/bloomz-7b1",
|
17 |
+
"batch_size": 8,
|
18 |
+
"device": null,
|
19 |
+
"model_dtype": "torch.float16",
|
20 |
+
"numpy_seed": 42,
|
21 |
+
"torch_seed": 42,
|
22 |
+
"random_seed": 42,
|
23 |
+
"fewshot_seed": 42
|
24 |
+
}
|
25 |
+
}
|
evals/afrimmlu_translate/afrimmlu_translate_hau-flan-t5-base.json
ADDED
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"results": {
|
3 |
+
"afrimmlu_translate_hau": {
|
4 |
+
"acc": 0.248,
|
5 |
+
"acc_stderr": 0.019332342821239103,
|
6 |
+
"f1": 0.24470006662225183,
|
7 |
+
"f1_stderr": "N/A"
|
8 |
+
}
|
9 |
+
},
|
10 |
+
"versions": {
|
11 |
+
"afrimmlu_translate_hau": 1.0,
|
12 |
+
"wandb_run_name": "serene-frost-12"
|
13 |
+
},
|
14 |
+
"config": {
|
15 |
+
"model": "hf",
|
16 |
+
"model_args": "pretrained=google/flan-t5-base",
|
17 |
+
"batch_size": 8,
|
18 |
+
"device": null,
|
19 |
+
"model_dtype": "torch.float32",
|
20 |
+
"numpy_seed": 42,
|
21 |
+
"torch_seed": 42,
|
22 |
+
"random_seed": 42,
|
23 |
+
"fewshot_seed": 42
|
24 |
+
}
|
25 |
+
}
|
evals/afrimmlu_translate/afrimmlu_translate_hau-flan-t5-large.json
ADDED
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"results": {
|
3 |
+
"afrimmlu_translate_hau": {
|
4 |
+
"acc": 0.29,
|
5 |
+
"acc_stderr": 0.020313179231745207,
|
6 |
+
"f1": 0.2906470359611149,
|
7 |
+
"f1_stderr": "N/A"
|
8 |
+
}
|
9 |
+
},
|
10 |
+
"versions": {
|
11 |
+
"afrimmlu_translate_hau": 1.0,
|
12 |
+
"wandb_run_name": "whole-serenity-13"
|
13 |
+
},
|
14 |
+
"config": {
|
15 |
+
"model": "hf",
|
16 |
+
"model_args": "pretrained=google/flan-t5-large",
|
17 |
+
"batch_size": 8,
|
18 |
+
"device": null,
|
19 |
+
"model_dtype": "torch.float32",
|
20 |
+
"numpy_seed": 42,
|
21 |
+
"torch_seed": 42,
|
22 |
+
"random_seed": 42,
|
23 |
+
"fewshot_seed": 42
|
24 |
+
}
|
25 |
+
}
|
evals/afrimmlu_translate/afrimmlu_translate_hau-flan-t5-small.json
ADDED
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"results": {
|
3 |
+
"afrimmlu_translate_hau": {
|
4 |
+
"acc": 0.278,
|
5 |
+
"acc_stderr": 0.020055833888070897,
|
6 |
+
"f1": 0.27432208144412573,
|
7 |
+
"f1_stderr": "N/A"
|
8 |
+
}
|
9 |
+
},
|
10 |
+
"versions": {
|
11 |
+
"afrimmlu_translate_hau": 1.0,
|
12 |
+
"wandb_run_name": "fresh-moon-14"
|
13 |
+
},
|
14 |
+
"config": {
|
15 |
+
"model": "hf",
|
16 |
+
"model_args": "pretrained=google/flan-t5-small",
|
17 |
+
"batch_size": 8,
|
18 |
+
"device": null,
|
19 |
+
"model_dtype": "torch.float32",
|
20 |
+
"numpy_seed": 42,
|
21 |
+
"torch_seed": 42,
|
22 |
+
"random_seed": 42,
|
23 |
+
"fewshot_seed": 42
|
24 |
+
}
|
25 |
+
}
|
evals/afrimmlu_translate/afrimmlu_translate_hau-mt0-base.json
ADDED
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"results": {
|
3 |
+
"afrimmlu_translate_hau": {
|
4 |
+
"acc": 0.224,
|
5 |
+
"acc_stderr": 0.01866399446471079,
|
6 |
+
"f1": 0.21520681120974008,
|
7 |
+
"f1_stderr": "N/A"
|
8 |
+
}
|
9 |
+
},
|
10 |
+
"versions": {
|
11 |
+
"afrimmlu_translate_hau": 1.0,
|
12 |
+
"wandb_run_name": "icy-star-8"
|
13 |
+
},
|
14 |
+
"config": {
|
15 |
+
"model": "hf",
|
16 |
+
"model_args": "pretrained=bigscience/mt0-base",
|
17 |
+
"batch_size": 8,
|
18 |
+
"device": null,
|
19 |
+
"model_dtype": "torch.float32",
|
20 |
+
"numpy_seed": 42,
|
21 |
+
"torch_seed": 42,
|
22 |
+
"random_seed": 42,
|
23 |
+
"fewshot_seed": 42
|
24 |
+
}
|
25 |
+
}
|
evals/afrimmlu_translate/afrimmlu_translate_hau-mt0-large.json
ADDED
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"results": {
|
3 |
+
"afrimmlu_translate_hau": {
|
4 |
+
"acc": 0.244,
|
5 |
+
"acc_stderr": 0.01922673489361459,
|
6 |
+
"f1": 0.2370237396867334,
|
7 |
+
"f1_stderr": "N/A"
|
8 |
+
}
|
9 |
+
},
|
10 |
+
"versions": {
|
11 |
+
"afrimmlu_translate_hau": 1.0,
|
12 |
+
"wandb_run_name": "light-cherry-9"
|
13 |
+
},
|
14 |
+
"config": {
|
15 |
+
"model": "hf",
|
16 |
+
"model_args": "pretrained=bigscience/mt0-large",
|
17 |
+
"batch_size": 8,
|
18 |
+
"device": null,
|
19 |
+
"model_dtype": "torch.float32",
|
20 |
+
"numpy_seed": 42,
|
21 |
+
"torch_seed": 42,
|
22 |
+
"random_seed": 42,
|
23 |
+
"fewshot_seed": 42
|
24 |
+
}
|
25 |
+
}
|
evals/afrimmlu_translate/afrimmlu_translate_hau-mt0-small.json
ADDED
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"results": {
|
3 |
+
"afrimmlu_translate_hau": {
|
4 |
+
"acc": 0.22,
|
5 |
+
"acc_stderr": 0.01854421137582033,
|
6 |
+
"f1": 0.2165969551393627,
|
7 |
+
"f1_stderr": "N/A"
|
8 |
+
}
|
9 |
+
},
|
10 |
+
"versions": {
|
11 |
+
"afrimmlu_translate_hau": 1.0,
|
12 |
+
"wandb_run_name": "brisk-sun-10"
|
13 |
+
},
|
14 |
+
"config": {
|
15 |
+
"model": "hf",
|
16 |
+
"model_args": "pretrained=bigscience/mt0-small",
|
17 |
+
"batch_size": 8,
|
18 |
+
"device": null,
|
19 |
+
"model_dtype": "torch.float32",
|
20 |
+
"numpy_seed": 42,
|
21 |
+
"torch_seed": 42,
|
22 |
+
"random_seed": 42,
|
23 |
+
"fewshot_seed": 42
|
24 |
+
}
|
25 |
+
}
|
evals/afrimmlu_translate/afrimmlu_translate_hau-mt0-xl.json
ADDED
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"results": {
|
3 |
+
"afrimmlu_translate_hau": {
|
4 |
+
"acc": 0.252,
|
5 |
+
"acc_stderr": 0.01943572728224952,
|
6 |
+
"f1": 0.24627743028468996,
|
7 |
+
"f1_stderr": "N/A"
|
8 |
+
}
|
9 |
+
},
|
10 |
+
"versions": {
|
11 |
+
"afrimmlu_translate_hau": 1.0,
|
12 |
+
"wandb_run_name": "soft-dew-11"
|
13 |
+
},
|
14 |
+
"config": {
|
15 |
+
"model": "hf",
|
16 |
+
"model_args": "pretrained=bigscience/mt0-xl",
|
17 |
+
"batch_size": 8,
|
18 |
+
"device": null,
|
19 |
+
"model_dtype": "torch.float32",
|
20 |
+
"numpy_seed": 42,
|
21 |
+
"torch_seed": 42,
|
22 |
+
"random_seed": 42,
|
23 |
+
"fewshot_seed": 42
|
24 |
+
}
|
25 |
+
}
|
evals/afrimmlu_translate/afrimmlu_translate_ibo-Meta-Llama-3-8B-Instruct.json
ADDED
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"results": {
|
3 |
+
"afrimmlu_translate_ibo": {
|
4 |
+
"acc": 0.364,
|
5 |
+
"acc_stderr": 0.02153917063731769,
|
6 |
+
"f1": 0.3620861488890409,
|
7 |
+
"f1_stderr": "N/A"
|
8 |
+
}
|
9 |
+
},
|
10 |
+
"versions": {
|
11 |
+
"afrimmlu_translate_ibo": 1.0,
|
12 |
+
"wandb_run_name": "happy-fog-3"
|
13 |
+
},
|
14 |
+
"config": {
|
15 |
+
"model": "hf",
|
16 |
+
"model_args": "pretrained=meta-llama/Meta-Llama-3-8B-Instruct",
|
17 |
+
"batch_size": 8,
|
18 |
+
"device": null,
|
19 |
+
"model_dtype": "torch.bfloat16",
|
20 |
+
"numpy_seed": 42,
|
21 |
+
"torch_seed": 42,
|
22 |
+
"random_seed": 42,
|
23 |
+
"fewshot_seed": 42
|
24 |
+
}
|
25 |
+
}
|
evals/afrimmlu_translate/afrimmlu_translate_ibo-afriteva_v2_large_ayaft.json
ADDED
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"results": {
|
3 |
+
"afrimmlu_translate_ibo": {
|
4 |
+
"acc": 0.214,
|
5 |
+
"acc_stderr": 0.018359797502387025,
|
6 |
+
"f1": 0.19723354975358284,
|
7 |
+
"f1_stderr": "N/A"
|
8 |
+
}
|
9 |
+
},
|
10 |
+
"versions": {
|
11 |
+
"afrimmlu_translate_ibo": 1.0,
|
12 |
+
"wandb_run_name": "cool-moon-2"
|
13 |
+
},
|
14 |
+
"config": {
|
15 |
+
"model": "hf",
|
16 |
+
"model_args": "pretrained=taresco/afriteva_v2_large_ayaft",
|
17 |
+
"batch_size": 8,
|
18 |
+
"device": null,
|
19 |
+
"model_dtype": "torch.float32",
|
20 |
+
"numpy_seed": 42,
|
21 |
+
"torch_seed": 42,
|
22 |
+
"random_seed": 42,
|
23 |
+
"fewshot_seed": 42
|
24 |
+
}
|
25 |
+
}
|
evals/afrimmlu_translate/afrimmlu_translate_ibo-aya-101.json
ADDED
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"results": {
|
3 |
+
"afrimmlu_translate_ibo": {
|
4 |
+
"acc": 0.316,
|
5 |
+
"acc_stderr": 0.020812359515855857,
|
6 |
+
"f1": 0.31581378648091524,
|
7 |
+
"f1_stderr": "N/A"
|
8 |
+
}
|
9 |
+
},
|
10 |
+
"versions": {
|
11 |
+
"afrimmlu_translate_ibo": 1.0,
|
12 |
+
"wandb_run_name": "major-rain-4"
|
13 |
+
},
|
14 |
+
"config": {
|
15 |
+
"model": "hf",
|
16 |
+
"model_args": "pretrained=CohereForAI/aya-101,parallelize=True",
|
17 |
+
"batch_size": 8,
|
18 |
+
"device": null,
|
19 |
+
"model_dtype": "torch.float32",
|
20 |
+
"numpy_seed": 42,
|
21 |
+
"torch_seed": 42,
|
22 |
+
"random_seed": 42,
|
23 |
+
"fewshot_seed": 42
|
24 |
+
}
|
25 |
+
}
|
evals/afrimmlu_translate/afrimmlu_translate_ibo-bloomz-1b7.json
ADDED
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"results": {
|
3 |
+
"afrimmlu_translate_ibo": {
|
4 |
+
"acc": 0.238,
|
5 |
+
"acc_stderr": 0.019064072958198452,
|
6 |
+
"f1": 0.24129061923077083,
|
7 |
+
"f1_stderr": "N/A"
|
8 |
+
}
|
9 |
+
},
|
10 |
+
"versions": {
|
11 |
+
"afrimmlu_translate_ibo": 1.0,
|
12 |
+
"wandb_run_name": "amber-sponge-5"
|
13 |
+
},
|
14 |
+
"config": {
|
15 |
+
"model": "hf",
|
16 |
+
"model_args": "pretrained=bigscience/bloomz-1b7",
|
17 |
+
"batch_size": 8,
|
18 |
+
"device": null,
|
19 |
+
"model_dtype": "torch.float16",
|
20 |
+
"numpy_seed": 42,
|
21 |
+
"torch_seed": 42,
|
22 |
+
"random_seed": 42,
|
23 |
+
"fewshot_seed": 42
|
24 |
+
}
|
25 |
+
}
|
evals/afrimmlu_translate/afrimmlu_translate_ibo-bloomz-3b.json
ADDED
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"results": {
|
3 |
+
"afrimmlu_translate_ibo": {
|
4 |
+
"acc": 0.252,
|
5 |
+
"acc_stderr": 0.01943572728224953,
|
6 |
+
"f1": 0.2521675138674737,
|
7 |
+
"f1_stderr": "N/A"
|
8 |
+
}
|
9 |
+
},
|
10 |
+
"versions": {
|
11 |
+
"afrimmlu_translate_ibo": 1.0,
|
12 |
+
"wandb_run_name": "atomic-lake-6"
|
13 |
+
},
|
14 |
+
"config": {
|
15 |
+
"model": "hf",
|
16 |
+
"model_args": "pretrained=bigscience/bloomz-3b",
|
17 |
+
"batch_size": 8,
|
18 |
+
"device": null,
|
19 |
+
"model_dtype": "torch.float16",
|
20 |
+
"numpy_seed": 42,
|
21 |
+
"torch_seed": 42,
|
22 |
+
"random_seed": 42,
|
23 |
+
"fewshot_seed": 42
|
24 |
+
}
|
25 |
+
}
|
evals/afrimmlu_translate/afrimmlu_translate_ibo-bloomz-7b1.json
ADDED
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"results": {
|
3 |
+
"afrimmlu_translate_ibo": {
|
4 |
+
"acc": 0.252,
|
5 |
+
"acc_stderr": 0.01943572728224953,
|
6 |
+
"f1": 0.2527994546842154,
|
7 |
+
"f1_stderr": "N/A"
|
8 |
+
}
|
9 |
+
},
|
10 |
+
"versions": {
|
11 |
+
"afrimmlu_translate_ibo": 1.0,
|
12 |
+
"wandb_run_name": "silver-moon-7"
|
13 |
+
},
|
14 |
+
"config": {
|
15 |
+
"model": "hf",
|
16 |
+
"model_args": "pretrained=bigscience/bloomz-7b1",
|
17 |
+
"batch_size": 8,
|
18 |
+
"device": null,
|
19 |
+
"model_dtype": "torch.float16",
|
20 |
+
"numpy_seed": 42,
|
21 |
+
"torch_seed": 42,
|
22 |
+
"random_seed": 42,
|
23 |
+
"fewshot_seed": 42
|
24 |
+
}
|
25 |
+
}
|
evals/afrimmlu_translate/afrimmlu_translate_ibo-flan-t5-base.json
ADDED
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"results": {
|
3 |
+
"afrimmlu_translate_ibo": {
|
4 |
+
"acc": 0.264,
|
5 |
+
"acc_stderr": 0.01973288558592211,
|
6 |
+
"f1": 0.26186909736627434,
|
7 |
+
"f1_stderr": "N/A"
|
8 |
+
}
|
9 |
+
},
|
10 |
+
"versions": {
|
11 |
+
"afrimmlu_translate_ibo": 1.0,
|
12 |
+
"wandb_run_name": "serene-frost-12"
|
13 |
+
},
|
14 |
+
"config": {
|
15 |
+
"model": "hf",
|
16 |
+
"model_args": "pretrained=google/flan-t5-base",
|
17 |
+
"batch_size": 8,
|
18 |
+
"device": null,
|
19 |
+
"model_dtype": "torch.float32",
|
20 |
+
"numpy_seed": 42,
|
21 |
+
"torch_seed": 42,
|
22 |
+
"random_seed": 42,
|
23 |
+
"fewshot_seed": 42
|
24 |
+
}
|
25 |
+
}
|
evals/afrimmlu_translate/afrimmlu_translate_ibo-flan-t5-large.json
ADDED
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"results": {
|
3 |
+
"afrimmlu_translate_ibo": {
|
4 |
+
"acc": 0.294,
|
5 |
+
"acc_stderr": 0.020395095484936617,
|
6 |
+
"f1": 0.2930675969903471,
|
7 |
+
"f1_stderr": "N/A"
|
8 |
+
}
|
9 |
+
},
|
10 |
+
"versions": {
|
11 |
+
"afrimmlu_translate_ibo": 1.0,
|
12 |
+
"wandb_run_name": "whole-serenity-13"
|
13 |
+
},
|
14 |
+
"config": {
|
15 |
+
"model": "hf",
|
16 |
+
"model_args": "pretrained=google/flan-t5-large",
|
17 |
+
"batch_size": 8,
|
18 |
+
"device": null,
|
19 |
+
"model_dtype": "torch.float32",
|
20 |
+
"numpy_seed": 42,
|
21 |
+
"torch_seed": 42,
|
22 |
+
"random_seed": 42,
|
23 |
+
"fewshot_seed": 42
|
24 |
+
}
|
25 |
+
}
|
evals/afrimmlu_translate/afrimmlu_translate_ibo-flan-t5-small.json
ADDED
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"results": {
|
3 |
+
"afrimmlu_translate_ibo": {
|
4 |
+
"acc": 0.262,
|
5 |
+
"acc_stderr": 0.019684688820194703,
|
6 |
+
"f1": 0.2533535628301984,
|
7 |
+
"f1_stderr": "N/A"
|
8 |
+
}
|
9 |
+
},
|
10 |
+
"versions": {
|
11 |
+
"afrimmlu_translate_ibo": 1.0,
|
12 |
+
"wandb_run_name": "fresh-moon-14"
|
13 |
+
},
|
14 |
+
"config": {
|
15 |
+
"model": "hf",
|
16 |
+
"model_args": "pretrained=google/flan-t5-small",
|
17 |
+
"batch_size": 8,
|
18 |
+
"device": null,
|
19 |
+
"model_dtype": "torch.float32",
|
20 |
+
"numpy_seed": 42,
|
21 |
+
"torch_seed": 42,
|
22 |
+
"random_seed": 42,
|
23 |
+
"fewshot_seed": 42
|
24 |
+
}
|
25 |
+
}
|
evals/afrimmlu_translate/afrimmlu_translate_ibo-mt0-base.json
ADDED
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"results": {
|
3 |
+
"afrimmlu_translate_ibo": {
|
4 |
+
"acc": 0.25,
|
5 |
+
"acc_stderr": 0.019384310743640384,
|
6 |
+
"f1": 0.24592729926179152,
|
7 |
+
"f1_stderr": "N/A"
|
8 |
+
}
|
9 |
+
},
|
10 |
+
"versions": {
|
11 |
+
"afrimmlu_translate_ibo": 1.0,
|
12 |
+
"wandb_run_name": "icy-star-8"
|
13 |
+
},
|
14 |
+
"config": {
|
15 |
+
"model": "hf",
|
16 |
+
"model_args": "pretrained=bigscience/mt0-base",
|
17 |
+
"batch_size": 8,
|
18 |
+
"device": null,
|
19 |
+
"model_dtype": "torch.float32",
|
20 |
+
"numpy_seed": 42,
|
21 |
+
"torch_seed": 42,
|
22 |
+
"random_seed": 42,
|
23 |
+
"fewshot_seed": 42
|
24 |
+
}
|
25 |
+
}
|