ToluClassics commited on
Commit
9691525
1 Parent(s): 517949c

add mmlu translate

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. content.py +1 -0
  2. evals/afrimmlu_translate/afrimmlu_translate_amh-Meta-Llama-3-8B-Instruct.json +25 -0
  3. evals/afrimmlu_translate/afrimmlu_translate_amh-afriteva_v2_large_ayaft.json +25 -0
  4. evals/afrimmlu_translate/afrimmlu_translate_amh-aya-101.json +25 -0
  5. evals/afrimmlu_translate/afrimmlu_translate_amh-bloomz-1b7.json +25 -0
  6. evals/afrimmlu_translate/afrimmlu_translate_amh-bloomz-3b.json +25 -0
  7. evals/afrimmlu_translate/afrimmlu_translate_amh-bloomz-7b1.json +25 -0
  8. evals/afrimmlu_translate/afrimmlu_translate_amh-flan-t5-base.json +25 -0
  9. evals/afrimmlu_translate/afrimmlu_translate_amh-flan-t5-large.json +25 -0
  10. evals/afrimmlu_translate/afrimmlu_translate_amh-flan-t5-small.json +25 -0
  11. evals/afrimmlu_translate/afrimmlu_translate_amh-mt0-base.json +25 -0
  12. evals/afrimmlu_translate/afrimmlu_translate_amh-mt0-large.json +25 -0
  13. evals/afrimmlu_translate/afrimmlu_translate_amh-mt0-small.json +25 -0
  14. evals/afrimmlu_translate/afrimmlu_translate_amh-mt0-xl.json +25 -0
  15. evals/afrimmlu_translate/afrimmlu_translate_ewe-Meta-Llama-3-8B-Instruct.json +25 -0
  16. evals/afrimmlu_translate/afrimmlu_translate_ewe-afriteva_v2_large_ayaft.json +25 -0
  17. evals/afrimmlu_translate/afrimmlu_translate_ewe-aya-101.json +25 -0
  18. evals/afrimmlu_translate/afrimmlu_translate_ewe-bloomz-1b7.json +25 -0
  19. evals/afrimmlu_translate/afrimmlu_translate_ewe-bloomz-3b.json +25 -0
  20. evals/afrimmlu_translate/afrimmlu_translate_ewe-bloomz-7b1.json +25 -0
  21. evals/afrimmlu_translate/afrimmlu_translate_ewe-flan-t5-base.json +25 -0
  22. evals/afrimmlu_translate/afrimmlu_translate_ewe-flan-t5-large.json +25 -0
  23. evals/afrimmlu_translate/afrimmlu_translate_ewe-flan-t5-small.json +25 -0
  24. evals/afrimmlu_translate/afrimmlu_translate_ewe-mt0-base.json +25 -0
  25. evals/afrimmlu_translate/afrimmlu_translate_ewe-mt0-large.json +25 -0
  26. evals/afrimmlu_translate/afrimmlu_translate_ewe-mt0-small.json +25 -0
  27. evals/afrimmlu_translate/afrimmlu_translate_ewe-mt0-xl.json +25 -0
  28. evals/afrimmlu_translate/afrimmlu_translate_hau-Meta-Llama-3-8B-Instruct.json +25 -0
  29. evals/afrimmlu_translate/afrimmlu_translate_hau-afriteva_v2_large_ayaft.json +25 -0
  30. evals/afrimmlu_translate/afrimmlu_translate_hau-aya-101.json +25 -0
  31. evals/afrimmlu_translate/afrimmlu_translate_hau-bloomz-1b7.json +25 -0
  32. evals/afrimmlu_translate/afrimmlu_translate_hau-bloomz-3b.json +25 -0
  33. evals/afrimmlu_translate/afrimmlu_translate_hau-bloomz-7b1.json +25 -0
  34. evals/afrimmlu_translate/afrimmlu_translate_hau-flan-t5-base.json +25 -0
  35. evals/afrimmlu_translate/afrimmlu_translate_hau-flan-t5-large.json +25 -0
  36. evals/afrimmlu_translate/afrimmlu_translate_hau-flan-t5-small.json +25 -0
  37. evals/afrimmlu_translate/afrimmlu_translate_hau-mt0-base.json +25 -0
  38. evals/afrimmlu_translate/afrimmlu_translate_hau-mt0-large.json +25 -0
  39. evals/afrimmlu_translate/afrimmlu_translate_hau-mt0-small.json +25 -0
  40. evals/afrimmlu_translate/afrimmlu_translate_hau-mt0-xl.json +25 -0
  41. evals/afrimmlu_translate/afrimmlu_translate_ibo-Meta-Llama-3-8B-Instruct.json +25 -0
  42. evals/afrimmlu_translate/afrimmlu_translate_ibo-afriteva_v2_large_ayaft.json +25 -0
  43. evals/afrimmlu_translate/afrimmlu_translate_ibo-aya-101.json +25 -0
  44. evals/afrimmlu_translate/afrimmlu_translate_ibo-bloomz-1b7.json +25 -0
  45. evals/afrimmlu_translate/afrimmlu_translate_ibo-bloomz-3b.json +25 -0
  46. evals/afrimmlu_translate/afrimmlu_translate_ibo-bloomz-7b1.json +25 -0
  47. evals/afrimmlu_translate/afrimmlu_translate_ibo-flan-t5-base.json +25 -0
  48. evals/afrimmlu_translate/afrimmlu_translate_ibo-flan-t5-large.json +25 -0
  49. evals/afrimmlu_translate/afrimmlu_translate_ibo-flan-t5-small.json +25 -0
  50. evals/afrimmlu_translate/afrimmlu_translate_ibo-mt0-base.json +25 -0
content.py CHANGED
@@ -3,6 +3,7 @@ TITLE = '<h1 align="center" id="space-title">African Languages LLM Eval Leaderbo
3
  INTRO_TEXT = f"""
4
  ## About
5
  This leaderboard tracks progress and ranks performance of large language models (LLMs) on African languages.
 
6
 
7
  We currently evaluate models over the following benchmarks:
8
 
 
3
  INTRO_TEXT = f"""
4
  ## About
5
  This leaderboard tracks progress and ranks performance of large language models (LLMs) on African languages.
6
+ This project uses the [lm-evaluation-harness by EleutherAI](https://github.com/EleutherAI/lm-evaluation-harness) for evaluation, focusing on African language tasks. Some of the tasks contained in this leaderboard have already been built into the harness (e.g. IrokoBench Tasks & Belebele).
7
 
8
  We currently evaluate models over the following benchmarks:
9
 
evals/afrimmlu_translate/afrimmlu_translate_amh-Meta-Llama-3-8B-Instruct.json ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": {
3
+ "afrimmlu_translate_amh": {
4
+ "acc": 0.394,
5
+ "acc_stderr": 0.021874299301689253,
6
+ "f1": 0.3911411480086645,
7
+ "f1_stderr": "N/A"
8
+ }
9
+ },
10
+ "versions": {
11
+ "afrimmlu_translate_amh": 1.0,
12
+ "wandb_run_name": "happy-fog-3"
13
+ },
14
+ "config": {
15
+ "model": "hf",
16
+ "model_args": "pretrained=meta-llama/Meta-Llama-3-8B-Instruct",
17
+ "batch_size": 8,
18
+ "device": null,
19
+ "model_dtype": "torch.bfloat16",
20
+ "numpy_seed": 42,
21
+ "torch_seed": 42,
22
+ "random_seed": 42,
23
+ "fewshot_seed": 42
24
+ }
25
+ }
evals/afrimmlu_translate/afrimmlu_translate_amh-afriteva_v2_large_ayaft.json ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": {
3
+ "afrimmlu_translate_amh": {
4
+ "acc": 0.212,
5
+ "acc_stderr": 0.01829703700401389,
6
+ "f1": 0.193400184674494,
7
+ "f1_stderr": "N/A"
8
+ }
9
+ },
10
+ "versions": {
11
+ "afrimmlu_translate_amh": 1.0,
12
+ "wandb_run_name": "cool-moon-2"
13
+ },
14
+ "config": {
15
+ "model": "hf",
16
+ "model_args": "pretrained=taresco/afriteva_v2_large_ayaft",
17
+ "batch_size": 8,
18
+ "device": null,
19
+ "model_dtype": "torch.float32",
20
+ "numpy_seed": 42,
21
+ "torch_seed": 42,
22
+ "random_seed": 42,
23
+ "fewshot_seed": 42
24
+ }
25
+ }
evals/afrimmlu_translate/afrimmlu_translate_amh-aya-101.json ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": {
3
+ "afrimmlu_translate_amh": {
4
+ "acc": 0.32,
5
+ "acc_stderr": 0.020882340488761805,
6
+ "f1": 0.321055726365249,
7
+ "f1_stderr": "N/A"
8
+ }
9
+ },
10
+ "versions": {
11
+ "afrimmlu_translate_amh": 1.0,
12
+ "wandb_run_name": "major-rain-4"
13
+ },
14
+ "config": {
15
+ "model": "hf",
16
+ "model_args": "pretrained=CohereForAI/aya-101,parallelize=True",
17
+ "batch_size": 8,
18
+ "device": null,
19
+ "model_dtype": "torch.float32",
20
+ "numpy_seed": 42,
21
+ "torch_seed": 42,
22
+ "random_seed": 42,
23
+ "fewshot_seed": 42
24
+ }
25
+ }
evals/afrimmlu_translate/afrimmlu_translate_amh-bloomz-1b7.json ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": {
3
+ "afrimmlu_translate_amh": {
4
+ "acc": 0.232,
5
+ "acc_stderr": 0.018896193591952055,
6
+ "f1": 0.2303475443171238,
7
+ "f1_stderr": "N/A"
8
+ }
9
+ },
10
+ "versions": {
11
+ "afrimmlu_translate_amh": 1.0,
12
+ "wandb_run_name": "amber-sponge-5"
13
+ },
14
+ "config": {
15
+ "model": "hf",
16
+ "model_args": "pretrained=bigscience/bloomz-1b7",
17
+ "batch_size": 8,
18
+ "device": null,
19
+ "model_dtype": "torch.float16",
20
+ "numpy_seed": 42,
21
+ "torch_seed": 42,
22
+ "random_seed": 42,
23
+ "fewshot_seed": 42
24
+ }
25
+ }
evals/afrimmlu_translate/afrimmlu_translate_amh-bloomz-3b.json ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": {
3
+ "afrimmlu_translate_amh": {
4
+ "acc": 0.252,
5
+ "acc_stderr": 0.019435727282249543,
6
+ "f1": 0.2511004641341941,
7
+ "f1_stderr": "N/A"
8
+ }
9
+ },
10
+ "versions": {
11
+ "afrimmlu_translate_amh": 1.0,
12
+ "wandb_run_name": "atomic-lake-6"
13
+ },
14
+ "config": {
15
+ "model": "hf",
16
+ "model_args": "pretrained=bigscience/bloomz-3b",
17
+ "batch_size": 8,
18
+ "device": null,
19
+ "model_dtype": "torch.float16",
20
+ "numpy_seed": 42,
21
+ "torch_seed": 42,
22
+ "random_seed": 42,
23
+ "fewshot_seed": 42
24
+ }
25
+ }
evals/afrimmlu_translate/afrimmlu_translate_amh-bloomz-7b1.json ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": {
3
+ "afrimmlu_translate_amh": {
4
+ "acc": 0.264,
5
+ "acc_stderr": 0.019732885585922084,
6
+ "f1": 0.2611884211157269,
7
+ "f1_stderr": "N/A"
8
+ }
9
+ },
10
+ "versions": {
11
+ "afrimmlu_translate_amh": 1.0,
12
+ "wandb_run_name": "silver-moon-7"
13
+ },
14
+ "config": {
15
+ "model": "hf",
16
+ "model_args": "pretrained=bigscience/bloomz-7b1",
17
+ "batch_size": 8,
18
+ "device": null,
19
+ "model_dtype": "torch.float16",
20
+ "numpy_seed": 42,
21
+ "torch_seed": 42,
22
+ "random_seed": 42,
23
+ "fewshot_seed": 42
24
+ }
25
+ }
evals/afrimmlu_translate/afrimmlu_translate_amh-flan-t5-base.json ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": {
3
+ "afrimmlu_translate_amh": {
4
+ "acc": 0.23,
5
+ "acc_stderr": 0.01883905039112313,
6
+ "f1": 0.22635667655752564,
7
+ "f1_stderr": "N/A"
8
+ }
9
+ },
10
+ "versions": {
11
+ "afrimmlu_translate_amh": 1.0,
12
+ "wandb_run_name": "serene-frost-12"
13
+ },
14
+ "config": {
15
+ "model": "hf",
16
+ "model_args": "pretrained=google/flan-t5-base",
17
+ "batch_size": 8,
18
+ "device": null,
19
+ "model_dtype": "torch.float32",
20
+ "numpy_seed": 42,
21
+ "torch_seed": 42,
22
+ "random_seed": 42,
23
+ "fewshot_seed": 42
24
+ }
25
+ }
evals/afrimmlu_translate/afrimmlu_translate_amh-flan-t5-large.json ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": {
3
+ "afrimmlu_translate_amh": {
4
+ "acc": 0.306,
5
+ "acc_stderr": 0.020629569998345417,
6
+ "f1": 0.30646879281309314,
7
+ "f1_stderr": "N/A"
8
+ }
9
+ },
10
+ "versions": {
11
+ "afrimmlu_translate_amh": 1.0,
12
+ "wandb_run_name": "whole-serenity-13"
13
+ },
14
+ "config": {
15
+ "model": "hf",
16
+ "model_args": "pretrained=google/flan-t5-large",
17
+ "batch_size": 8,
18
+ "device": null,
19
+ "model_dtype": "torch.float32",
20
+ "numpy_seed": 42,
21
+ "torch_seed": 42,
22
+ "random_seed": 42,
23
+ "fewshot_seed": 42
24
+ }
25
+ }
evals/afrimmlu_translate/afrimmlu_translate_amh-flan-t5-small.json ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": {
3
+ "afrimmlu_translate_amh": {
4
+ "acc": 0.26,
5
+ "acc_stderr": 0.019635965529725512,
6
+ "f1": 0.25428716687048475,
7
+ "f1_stderr": "N/A"
8
+ }
9
+ },
10
+ "versions": {
11
+ "afrimmlu_translate_amh": 1.0,
12
+ "wandb_run_name": "fresh-moon-14"
13
+ },
14
+ "config": {
15
+ "model": "hf",
16
+ "model_args": "pretrained=google/flan-t5-small",
17
+ "batch_size": 8,
18
+ "device": null,
19
+ "model_dtype": "torch.float32",
20
+ "numpy_seed": 42,
21
+ "torch_seed": 42,
22
+ "random_seed": 42,
23
+ "fewshot_seed": 42
24
+ }
25
+ }
evals/afrimmlu_translate/afrimmlu_translate_amh-mt0-base.json ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": {
3
+ "afrimmlu_translate_amh": {
4
+ "acc": 0.242,
5
+ "acc_stderr": 0.019173085678337178,
6
+ "f1": 0.23588653079268743,
7
+ "f1_stderr": "N/A"
8
+ }
9
+ },
10
+ "versions": {
11
+ "afrimmlu_translate_amh": 1.0,
12
+ "wandb_run_name": "icy-star-8"
13
+ },
14
+ "config": {
15
+ "model": "hf",
16
+ "model_args": "pretrained=bigscience/mt0-base",
17
+ "batch_size": 8,
18
+ "device": null,
19
+ "model_dtype": "torch.float32",
20
+ "numpy_seed": 42,
21
+ "torch_seed": 42,
22
+ "random_seed": 42,
23
+ "fewshot_seed": 42
24
+ }
25
+ }
evals/afrimmlu_translate/afrimmlu_translate_amh-mt0-large.json ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": {
3
+ "afrimmlu_translate_amh": {
4
+ "acc": 0.252,
5
+ "acc_stderr": 0.01943572728224953,
6
+ "f1": 0.2492758416335272,
7
+ "f1_stderr": "N/A"
8
+ }
9
+ },
10
+ "versions": {
11
+ "afrimmlu_translate_amh": 1.0,
12
+ "wandb_run_name": "light-cherry-9"
13
+ },
14
+ "config": {
15
+ "model": "hf",
16
+ "model_args": "pretrained=bigscience/mt0-large",
17
+ "batch_size": 8,
18
+ "device": null,
19
+ "model_dtype": "torch.float32",
20
+ "numpy_seed": 42,
21
+ "torch_seed": 42,
22
+ "random_seed": 42,
23
+ "fewshot_seed": 42
24
+ }
25
+ }
evals/afrimmlu_translate/afrimmlu_translate_amh-mt0-small.json ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": {
3
+ "afrimmlu_translate_amh": {
4
+ "acc": 0.228,
5
+ "acc_stderr": 0.018781306529363197,
6
+ "f1": 0.21952920076616783,
7
+ "f1_stderr": "N/A"
8
+ }
9
+ },
10
+ "versions": {
11
+ "afrimmlu_translate_amh": 1.0,
12
+ "wandb_run_name": "brisk-sun-10"
13
+ },
14
+ "config": {
15
+ "model": "hf",
16
+ "model_args": "pretrained=bigscience/mt0-small",
17
+ "batch_size": 8,
18
+ "device": null,
19
+ "model_dtype": "torch.float32",
20
+ "numpy_seed": 42,
21
+ "torch_seed": 42,
22
+ "random_seed": 42,
23
+ "fewshot_seed": 42
24
+ }
25
+ }
evals/afrimmlu_translate/afrimmlu_translate_amh-mt0-xl.json ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": {
3
+ "afrimmlu_translate_amh": {
4
+ "acc": 0.318,
5
+ "acc_stderr": 0.020847571620814017,
6
+ "f1": 0.3106228371529179,
7
+ "f1_stderr": "N/A"
8
+ }
9
+ },
10
+ "versions": {
11
+ "afrimmlu_translate_amh": 1.0,
12
+ "wandb_run_name": "soft-dew-11"
13
+ },
14
+ "config": {
15
+ "model": "hf",
16
+ "model_args": "pretrained=bigscience/mt0-xl",
17
+ "batch_size": 8,
18
+ "device": null,
19
+ "model_dtype": "torch.float32",
20
+ "numpy_seed": 42,
21
+ "torch_seed": 42,
22
+ "random_seed": 42,
23
+ "fewshot_seed": 42
24
+ }
25
+ }
evals/afrimmlu_translate/afrimmlu_translate_ewe-Meta-Llama-3-8B-Instruct.json ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": {
3
+ "afrimmlu_translate_ewe": {
4
+ "acc": 0.338,
5
+ "acc_stderr": 0.021175665695209407,
6
+ "f1": 0.33931031230441916,
7
+ "f1_stderr": "N/A"
8
+ }
9
+ },
10
+ "versions": {
11
+ "afrimmlu_translate_ewe": 1.0,
12
+ "wandb_run_name": "happy-fog-3"
13
+ },
14
+ "config": {
15
+ "model": "hf",
16
+ "model_args": "pretrained=meta-llama/Meta-Llama-3-8B-Instruct",
17
+ "batch_size": 8,
18
+ "device": null,
19
+ "model_dtype": "torch.bfloat16",
20
+ "numpy_seed": 42,
21
+ "torch_seed": 42,
22
+ "random_seed": 42,
23
+ "fewshot_seed": 42
24
+ }
25
+ }
evals/afrimmlu_translate/afrimmlu_translate_ewe-afriteva_v2_large_ayaft.json ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": {
3
+ "afrimmlu_translate_ewe": {
4
+ "acc": 0.222,
5
+ "acc_stderr": 0.01860441475825008,
6
+ "f1": 0.21571520023861815,
7
+ "f1_stderr": "N/A"
8
+ }
9
+ },
10
+ "versions": {
11
+ "afrimmlu_translate_ewe": 1.0,
12
+ "wandb_run_name": "cool-moon-2"
13
+ },
14
+ "config": {
15
+ "model": "hf",
16
+ "model_args": "pretrained=taresco/afriteva_v2_large_ayaft",
17
+ "batch_size": 8,
18
+ "device": null,
19
+ "model_dtype": "torch.float32",
20
+ "numpy_seed": 42,
21
+ "torch_seed": 42,
22
+ "random_seed": 42,
23
+ "fewshot_seed": 42
24
+ }
25
+ }
evals/afrimmlu_translate/afrimmlu_translate_ewe-aya-101.json ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": {
3
+ "afrimmlu_translate_ewe": {
4
+ "acc": 0.304,
5
+ "acc_stderr": 0.02059164957122493,
6
+ "f1": 0.2984881378963239,
7
+ "f1_stderr": "N/A"
8
+ }
9
+ },
10
+ "versions": {
11
+ "afrimmlu_translate_ewe": 1.0,
12
+ "wandb_run_name": "major-rain-4"
13
+ },
14
+ "config": {
15
+ "model": "hf",
16
+ "model_args": "pretrained=CohereForAI/aya-101,parallelize=True",
17
+ "batch_size": 8,
18
+ "device": null,
19
+ "model_dtype": "torch.float32",
20
+ "numpy_seed": 42,
21
+ "torch_seed": 42,
22
+ "random_seed": 42,
23
+ "fewshot_seed": 42
24
+ }
25
+ }
evals/afrimmlu_translate/afrimmlu_translate_ewe-bloomz-1b7.json ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": {
3
+ "afrimmlu_translate_ewe": {
4
+ "acc": 0.244,
5
+ "acc_stderr": 0.0192267348936146,
6
+ "f1": 0.24428173105444703,
7
+ "f1_stderr": "N/A"
8
+ }
9
+ },
10
+ "versions": {
11
+ "afrimmlu_translate_ewe": 1.0,
12
+ "wandb_run_name": "amber-sponge-5"
13
+ },
14
+ "config": {
15
+ "model": "hf",
16
+ "model_args": "pretrained=bigscience/bloomz-1b7",
17
+ "batch_size": 8,
18
+ "device": null,
19
+ "model_dtype": "torch.float16",
20
+ "numpy_seed": 42,
21
+ "torch_seed": 42,
22
+ "random_seed": 42,
23
+ "fewshot_seed": 42
24
+ }
25
+ }
evals/afrimmlu_translate/afrimmlu_translate_ewe-bloomz-3b.json ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": {
3
+ "afrimmlu_translate_ewe": {
4
+ "acc": 0.246,
5
+ "acc_stderr": 0.019279819056352548,
6
+ "f1": 0.2468667245003703,
7
+ "f1_stderr": "N/A"
8
+ }
9
+ },
10
+ "versions": {
11
+ "afrimmlu_translate_ewe": 1.0,
12
+ "wandb_run_name": "atomic-lake-6"
13
+ },
14
+ "config": {
15
+ "model": "hf",
16
+ "model_args": "pretrained=bigscience/bloomz-3b",
17
+ "batch_size": 8,
18
+ "device": null,
19
+ "model_dtype": "torch.float16",
20
+ "numpy_seed": 42,
21
+ "torch_seed": 42,
22
+ "random_seed": 42,
23
+ "fewshot_seed": 42
24
+ }
25
+ }
evals/afrimmlu_translate/afrimmlu_translate_ewe-bloomz-7b1.json ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": {
3
+ "afrimmlu_translate_ewe": {
4
+ "acc": 0.272,
5
+ "acc_stderr": 0.01992048320956609,
6
+ "f1": 0.27134200495691096,
7
+ "f1_stderr": "N/A"
8
+ }
9
+ },
10
+ "versions": {
11
+ "afrimmlu_translate_ewe": 1.0,
12
+ "wandb_run_name": "silver-moon-7"
13
+ },
14
+ "config": {
15
+ "model": "hf",
16
+ "model_args": "pretrained=bigscience/bloomz-7b1",
17
+ "batch_size": 8,
18
+ "device": null,
19
+ "model_dtype": "torch.float16",
20
+ "numpy_seed": 42,
21
+ "torch_seed": 42,
22
+ "random_seed": 42,
23
+ "fewshot_seed": 42
24
+ }
25
+ }
evals/afrimmlu_translate/afrimmlu_translate_ewe-flan-t5-base.json ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": {
3
+ "afrimmlu_translate_ewe": {
4
+ "acc": 0.274,
5
+ "acc_stderr": 0.01996610354027947,
6
+ "f1": 0.2733405472168771,
7
+ "f1_stderr": "N/A"
8
+ }
9
+ },
10
+ "versions": {
11
+ "afrimmlu_translate_ewe": 1.0,
12
+ "wandb_run_name": "serene-frost-12"
13
+ },
14
+ "config": {
15
+ "model": "hf",
16
+ "model_args": "pretrained=google/flan-t5-base",
17
+ "batch_size": 8,
18
+ "device": null,
19
+ "model_dtype": "torch.float32",
20
+ "numpy_seed": 42,
21
+ "torch_seed": 42,
22
+ "random_seed": 42,
23
+ "fewshot_seed": 42
24
+ }
25
+ }
evals/afrimmlu_translate/afrimmlu_translate_ewe-flan-t5-large.json ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": {
3
+ "afrimmlu_translate_ewe": {
4
+ "acc": 0.256,
5
+ "acc_stderr": 0.019536923574747612,
6
+ "f1": 0.25476798405989093,
7
+ "f1_stderr": "N/A"
8
+ }
9
+ },
10
+ "versions": {
11
+ "afrimmlu_translate_ewe": 1.0,
12
+ "wandb_run_name": "whole-serenity-13"
13
+ },
14
+ "config": {
15
+ "model": "hf",
16
+ "model_args": "pretrained=google/flan-t5-large",
17
+ "batch_size": 8,
18
+ "device": null,
19
+ "model_dtype": "torch.float32",
20
+ "numpy_seed": 42,
21
+ "torch_seed": 42,
22
+ "random_seed": 42,
23
+ "fewshot_seed": 42
24
+ }
25
+ }
evals/afrimmlu_translate/afrimmlu_translate_ewe-flan-t5-small.json ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": {
3
+ "afrimmlu_translate_ewe": {
4
+ "acc": 0.26,
5
+ "acc_stderr": 0.019635965529725512,
6
+ "f1": 0.24971217821262703,
7
+ "f1_stderr": "N/A"
8
+ }
9
+ },
10
+ "versions": {
11
+ "afrimmlu_translate_ewe": 1.0,
12
+ "wandb_run_name": "fresh-moon-14"
13
+ },
14
+ "config": {
15
+ "model": "hf",
16
+ "model_args": "pretrained=google/flan-t5-small",
17
+ "batch_size": 8,
18
+ "device": null,
19
+ "model_dtype": "torch.float32",
20
+ "numpy_seed": 42,
21
+ "torch_seed": 42,
22
+ "random_seed": 42,
23
+ "fewshot_seed": 42
24
+ }
25
+ }
evals/afrimmlu_translate/afrimmlu_translate_ewe-mt0-base.json ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": {
3
+ "afrimmlu_translate_ewe": {
4
+ "acc": 0.226,
5
+ "acc_stderr": 0.018722956449139933,
6
+ "f1": 0.21750280479645925,
7
+ "f1_stderr": "N/A"
8
+ }
9
+ },
10
+ "versions": {
11
+ "afrimmlu_translate_ewe": 1.0,
12
+ "wandb_run_name": "icy-star-8"
13
+ },
14
+ "config": {
15
+ "model": "hf",
16
+ "model_args": "pretrained=bigscience/mt0-base",
17
+ "batch_size": 8,
18
+ "device": null,
19
+ "model_dtype": "torch.float32",
20
+ "numpy_seed": 42,
21
+ "torch_seed": 42,
22
+ "random_seed": 42,
23
+ "fewshot_seed": 42
24
+ }
25
+ }
evals/afrimmlu_translate/afrimmlu_translate_ewe-mt0-large.json ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": {
3
+ "afrimmlu_translate_ewe": {
4
+ "acc": 0.238,
5
+ "acc_stderr": 0.01906407295819844,
6
+ "f1": 0.23136212205719056,
7
+ "f1_stderr": "N/A"
8
+ }
9
+ },
10
+ "versions": {
11
+ "afrimmlu_translate_ewe": 1.0,
12
+ "wandb_run_name": "light-cherry-9"
13
+ },
14
+ "config": {
15
+ "model": "hf",
16
+ "model_args": "pretrained=bigscience/mt0-large",
17
+ "batch_size": 8,
18
+ "device": null,
19
+ "model_dtype": "torch.float32",
20
+ "numpy_seed": 42,
21
+ "torch_seed": 42,
22
+ "random_seed": 42,
23
+ "fewshot_seed": 42
24
+ }
25
+ }
evals/afrimmlu_translate/afrimmlu_translate_ewe-mt0-small.json ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": {
3
+ "afrimmlu_translate_ewe": {
4
+ "acc": 0.248,
5
+ "acc_stderr": 0.019332342821239107,
6
+ "f1": 0.23917170024216244,
7
+ "f1_stderr": "N/A"
8
+ }
9
+ },
10
+ "versions": {
11
+ "afrimmlu_translate_ewe": 1.0,
12
+ "wandb_run_name": "brisk-sun-10"
13
+ },
14
+ "config": {
15
+ "model": "hf",
16
+ "model_args": "pretrained=bigscience/mt0-small",
17
+ "batch_size": 8,
18
+ "device": null,
19
+ "model_dtype": "torch.float32",
20
+ "numpy_seed": 42,
21
+ "torch_seed": 42,
22
+ "random_seed": 42,
23
+ "fewshot_seed": 42
24
+ }
25
+ }
evals/afrimmlu_translate/afrimmlu_translate_ewe-mt0-xl.json ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": {
3
+ "afrimmlu_translate_ewe": {
4
+ "acc": 0.29,
5
+ "acc_stderr": 0.02031317923174519,
6
+ "f1": 0.28795219737987676,
7
+ "f1_stderr": "N/A"
8
+ }
9
+ },
10
+ "versions": {
11
+ "afrimmlu_translate_ewe": 1.0,
12
+ "wandb_run_name": "soft-dew-11"
13
+ },
14
+ "config": {
15
+ "model": "hf",
16
+ "model_args": "pretrained=bigscience/mt0-xl",
17
+ "batch_size": 8,
18
+ "device": null,
19
+ "model_dtype": "torch.float32",
20
+ "numpy_seed": 42,
21
+ "torch_seed": 42,
22
+ "random_seed": 42,
23
+ "fewshot_seed": 42
24
+ }
25
+ }
evals/afrimmlu_translate/afrimmlu_translate_hau-Meta-Llama-3-8B-Instruct.json ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": {
3
+ "afrimmlu_translate_hau": {
4
+ "acc": 0.36,
5
+ "acc_stderr": 0.02148775108972053,
6
+ "f1": 0.3604494892242261,
7
+ "f1_stderr": "N/A"
8
+ }
9
+ },
10
+ "versions": {
11
+ "afrimmlu_translate_hau": 1.0,
12
+ "wandb_run_name": "happy-fog-3"
13
+ },
14
+ "config": {
15
+ "model": "hf",
16
+ "model_args": "pretrained=meta-llama/Meta-Llama-3-8B-Instruct",
17
+ "batch_size": 8,
18
+ "device": null,
19
+ "model_dtype": "torch.bfloat16",
20
+ "numpy_seed": 42,
21
+ "torch_seed": 42,
22
+ "random_seed": 42,
23
+ "fewshot_seed": 42
24
+ }
25
+ }
evals/afrimmlu_translate/afrimmlu_translate_hau-afriteva_v2_large_ayaft.json ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": {
3
+ "afrimmlu_translate_hau": {
4
+ "acc": 0.24,
5
+ "acc_stderr": 0.01911886665375974,
6
+ "f1": 0.2223787592766505,
7
+ "f1_stderr": "N/A"
8
+ }
9
+ },
10
+ "versions": {
11
+ "afrimmlu_translate_hau": 1.0,
12
+ "wandb_run_name": "cool-moon-2"
13
+ },
14
+ "config": {
15
+ "model": "hf",
16
+ "model_args": "pretrained=taresco/afriteva_v2_large_ayaft",
17
+ "batch_size": 8,
18
+ "device": null,
19
+ "model_dtype": "torch.float32",
20
+ "numpy_seed": 42,
21
+ "torch_seed": 42,
22
+ "random_seed": 42,
23
+ "fewshot_seed": 42
24
+ }
25
+ }
evals/afrimmlu_translate/afrimmlu_translate_hau-aya-101.json ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": {
3
+ "afrimmlu_translate_hau": {
4
+ "acc": 0.308,
5
+ "acc_stderr": 0.0206670329874661,
6
+ "f1": 0.3039058851447556,
7
+ "f1_stderr": "N/A"
8
+ }
9
+ },
10
+ "versions": {
11
+ "afrimmlu_translate_hau": 1.0,
12
+ "wandb_run_name": "major-rain-4"
13
+ },
14
+ "config": {
15
+ "model": "hf",
16
+ "model_args": "pretrained=CohereForAI/aya-101,parallelize=True",
17
+ "batch_size": 8,
18
+ "device": null,
19
+ "model_dtype": "torch.float32",
20
+ "numpy_seed": 42,
21
+ "torch_seed": 42,
22
+ "random_seed": 42,
23
+ "fewshot_seed": 42
24
+ }
25
+ }
evals/afrimmlu_translate/afrimmlu_translate_hau-bloomz-1b7.json ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": {
3
+ "afrimmlu_translate_hau": {
4
+ "acc": 0.274,
5
+ "acc_stderr": 0.019966103540279445,
6
+ "f1": 0.2724977541568708,
7
+ "f1_stderr": "N/A"
8
+ }
9
+ },
10
+ "versions": {
11
+ "afrimmlu_translate_hau": 1.0,
12
+ "wandb_run_name": "amber-sponge-5"
13
+ },
14
+ "config": {
15
+ "model": "hf",
16
+ "model_args": "pretrained=bigscience/bloomz-1b7",
17
+ "batch_size": 8,
18
+ "device": null,
19
+ "model_dtype": "torch.float16",
20
+ "numpy_seed": 42,
21
+ "torch_seed": 42,
22
+ "random_seed": 42,
23
+ "fewshot_seed": 42
24
+ }
25
+ }
evals/afrimmlu_translate/afrimmlu_translate_hau-bloomz-3b.json ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": {
3
+ "afrimmlu_translate_hau": {
4
+ "acc": 0.304,
5
+ "acc_stderr": 0.020591649571224932,
6
+ "f1": 0.3034697818886015,
7
+ "f1_stderr": "N/A"
8
+ }
9
+ },
10
+ "versions": {
11
+ "afrimmlu_translate_hau": 1.0,
12
+ "wandb_run_name": "atomic-lake-6"
13
+ },
14
+ "config": {
15
+ "model": "hf",
16
+ "model_args": "pretrained=bigscience/bloomz-3b",
17
+ "batch_size": 8,
18
+ "device": null,
19
+ "model_dtype": "torch.float16",
20
+ "numpy_seed": 42,
21
+ "torch_seed": 42,
22
+ "random_seed": 42,
23
+ "fewshot_seed": 42
24
+ }
25
+ }
evals/afrimmlu_translate/afrimmlu_translate_hau-bloomz-7b1.json ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": {
3
+ "afrimmlu_translate_hau": {
4
+ "acc": 0.32,
5
+ "acc_stderr": 0.020882340488761805,
6
+ "f1": 0.3200184974536263,
7
+ "f1_stderr": "N/A"
8
+ }
9
+ },
10
+ "versions": {
11
+ "afrimmlu_translate_hau": 1.0,
12
+ "wandb_run_name": "silver-moon-7"
13
+ },
14
+ "config": {
15
+ "model": "hf",
16
+ "model_args": "pretrained=bigscience/bloomz-7b1",
17
+ "batch_size": 8,
18
+ "device": null,
19
+ "model_dtype": "torch.float16",
20
+ "numpy_seed": 42,
21
+ "torch_seed": 42,
22
+ "random_seed": 42,
23
+ "fewshot_seed": 42
24
+ }
25
+ }
evals/afrimmlu_translate/afrimmlu_translate_hau-flan-t5-base.json ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": {
3
+ "afrimmlu_translate_hau": {
4
+ "acc": 0.248,
5
+ "acc_stderr": 0.019332342821239103,
6
+ "f1": 0.24470006662225183,
7
+ "f1_stderr": "N/A"
8
+ }
9
+ },
10
+ "versions": {
11
+ "afrimmlu_translate_hau": 1.0,
12
+ "wandb_run_name": "serene-frost-12"
13
+ },
14
+ "config": {
15
+ "model": "hf",
16
+ "model_args": "pretrained=google/flan-t5-base",
17
+ "batch_size": 8,
18
+ "device": null,
19
+ "model_dtype": "torch.float32",
20
+ "numpy_seed": 42,
21
+ "torch_seed": 42,
22
+ "random_seed": 42,
23
+ "fewshot_seed": 42
24
+ }
25
+ }
evals/afrimmlu_translate/afrimmlu_translate_hau-flan-t5-large.json ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": {
3
+ "afrimmlu_translate_hau": {
4
+ "acc": 0.29,
5
+ "acc_stderr": 0.020313179231745207,
6
+ "f1": 0.2906470359611149,
7
+ "f1_stderr": "N/A"
8
+ }
9
+ },
10
+ "versions": {
11
+ "afrimmlu_translate_hau": 1.0,
12
+ "wandb_run_name": "whole-serenity-13"
13
+ },
14
+ "config": {
15
+ "model": "hf",
16
+ "model_args": "pretrained=google/flan-t5-large",
17
+ "batch_size": 8,
18
+ "device": null,
19
+ "model_dtype": "torch.float32",
20
+ "numpy_seed": 42,
21
+ "torch_seed": 42,
22
+ "random_seed": 42,
23
+ "fewshot_seed": 42
24
+ }
25
+ }
evals/afrimmlu_translate/afrimmlu_translate_hau-flan-t5-small.json ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": {
3
+ "afrimmlu_translate_hau": {
4
+ "acc": 0.278,
5
+ "acc_stderr": 0.020055833888070897,
6
+ "f1": 0.27432208144412573,
7
+ "f1_stderr": "N/A"
8
+ }
9
+ },
10
+ "versions": {
11
+ "afrimmlu_translate_hau": 1.0,
12
+ "wandb_run_name": "fresh-moon-14"
13
+ },
14
+ "config": {
15
+ "model": "hf",
16
+ "model_args": "pretrained=google/flan-t5-small",
17
+ "batch_size": 8,
18
+ "device": null,
19
+ "model_dtype": "torch.float32",
20
+ "numpy_seed": 42,
21
+ "torch_seed": 42,
22
+ "random_seed": 42,
23
+ "fewshot_seed": 42
24
+ }
25
+ }
evals/afrimmlu_translate/afrimmlu_translate_hau-mt0-base.json ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": {
3
+ "afrimmlu_translate_hau": {
4
+ "acc": 0.224,
5
+ "acc_stderr": 0.01866399446471079,
6
+ "f1": 0.21520681120974008,
7
+ "f1_stderr": "N/A"
8
+ }
9
+ },
10
+ "versions": {
11
+ "afrimmlu_translate_hau": 1.0,
12
+ "wandb_run_name": "icy-star-8"
13
+ },
14
+ "config": {
15
+ "model": "hf",
16
+ "model_args": "pretrained=bigscience/mt0-base",
17
+ "batch_size": 8,
18
+ "device": null,
19
+ "model_dtype": "torch.float32",
20
+ "numpy_seed": 42,
21
+ "torch_seed": 42,
22
+ "random_seed": 42,
23
+ "fewshot_seed": 42
24
+ }
25
+ }
evals/afrimmlu_translate/afrimmlu_translate_hau-mt0-large.json ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": {
3
+ "afrimmlu_translate_hau": {
4
+ "acc": 0.244,
5
+ "acc_stderr": 0.01922673489361459,
6
+ "f1": 0.2370237396867334,
7
+ "f1_stderr": "N/A"
8
+ }
9
+ },
10
+ "versions": {
11
+ "afrimmlu_translate_hau": 1.0,
12
+ "wandb_run_name": "light-cherry-9"
13
+ },
14
+ "config": {
15
+ "model": "hf",
16
+ "model_args": "pretrained=bigscience/mt0-large",
17
+ "batch_size": 8,
18
+ "device": null,
19
+ "model_dtype": "torch.float32",
20
+ "numpy_seed": 42,
21
+ "torch_seed": 42,
22
+ "random_seed": 42,
23
+ "fewshot_seed": 42
24
+ }
25
+ }
evals/afrimmlu_translate/afrimmlu_translate_hau-mt0-small.json ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": {
3
+ "afrimmlu_translate_hau": {
4
+ "acc": 0.22,
5
+ "acc_stderr": 0.01854421137582033,
6
+ "f1": 0.2165969551393627,
7
+ "f1_stderr": "N/A"
8
+ }
9
+ },
10
+ "versions": {
11
+ "afrimmlu_translate_hau": 1.0,
12
+ "wandb_run_name": "brisk-sun-10"
13
+ },
14
+ "config": {
15
+ "model": "hf",
16
+ "model_args": "pretrained=bigscience/mt0-small",
17
+ "batch_size": 8,
18
+ "device": null,
19
+ "model_dtype": "torch.float32",
20
+ "numpy_seed": 42,
21
+ "torch_seed": 42,
22
+ "random_seed": 42,
23
+ "fewshot_seed": 42
24
+ }
25
+ }
evals/afrimmlu_translate/afrimmlu_translate_hau-mt0-xl.json ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": {
3
+ "afrimmlu_translate_hau": {
4
+ "acc": 0.252,
5
+ "acc_stderr": 0.01943572728224952,
6
+ "f1": 0.24627743028468996,
7
+ "f1_stderr": "N/A"
8
+ }
9
+ },
10
+ "versions": {
11
+ "afrimmlu_translate_hau": 1.0,
12
+ "wandb_run_name": "soft-dew-11"
13
+ },
14
+ "config": {
15
+ "model": "hf",
16
+ "model_args": "pretrained=bigscience/mt0-xl",
17
+ "batch_size": 8,
18
+ "device": null,
19
+ "model_dtype": "torch.float32",
20
+ "numpy_seed": 42,
21
+ "torch_seed": 42,
22
+ "random_seed": 42,
23
+ "fewshot_seed": 42
24
+ }
25
+ }
evals/afrimmlu_translate/afrimmlu_translate_ibo-Meta-Llama-3-8B-Instruct.json ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": {
3
+ "afrimmlu_translate_ibo": {
4
+ "acc": 0.364,
5
+ "acc_stderr": 0.02153917063731769,
6
+ "f1": 0.3620861488890409,
7
+ "f1_stderr": "N/A"
8
+ }
9
+ },
10
+ "versions": {
11
+ "afrimmlu_translate_ibo": 1.0,
12
+ "wandb_run_name": "happy-fog-3"
13
+ },
14
+ "config": {
15
+ "model": "hf",
16
+ "model_args": "pretrained=meta-llama/Meta-Llama-3-8B-Instruct",
17
+ "batch_size": 8,
18
+ "device": null,
19
+ "model_dtype": "torch.bfloat16",
20
+ "numpy_seed": 42,
21
+ "torch_seed": 42,
22
+ "random_seed": 42,
23
+ "fewshot_seed": 42
24
+ }
25
+ }
evals/afrimmlu_translate/afrimmlu_translate_ibo-afriteva_v2_large_ayaft.json ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": {
3
+ "afrimmlu_translate_ibo": {
4
+ "acc": 0.214,
5
+ "acc_stderr": 0.018359797502387025,
6
+ "f1": 0.19723354975358284,
7
+ "f1_stderr": "N/A"
8
+ }
9
+ },
10
+ "versions": {
11
+ "afrimmlu_translate_ibo": 1.0,
12
+ "wandb_run_name": "cool-moon-2"
13
+ },
14
+ "config": {
15
+ "model": "hf",
16
+ "model_args": "pretrained=taresco/afriteva_v2_large_ayaft",
17
+ "batch_size": 8,
18
+ "device": null,
19
+ "model_dtype": "torch.float32",
20
+ "numpy_seed": 42,
21
+ "torch_seed": 42,
22
+ "random_seed": 42,
23
+ "fewshot_seed": 42
24
+ }
25
+ }
evals/afrimmlu_translate/afrimmlu_translate_ibo-aya-101.json ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": {
3
+ "afrimmlu_translate_ibo": {
4
+ "acc": 0.316,
5
+ "acc_stderr": 0.020812359515855857,
6
+ "f1": 0.31581378648091524,
7
+ "f1_stderr": "N/A"
8
+ }
9
+ },
10
+ "versions": {
11
+ "afrimmlu_translate_ibo": 1.0,
12
+ "wandb_run_name": "major-rain-4"
13
+ },
14
+ "config": {
15
+ "model": "hf",
16
+ "model_args": "pretrained=CohereForAI/aya-101,parallelize=True",
17
+ "batch_size": 8,
18
+ "device": null,
19
+ "model_dtype": "torch.float32",
20
+ "numpy_seed": 42,
21
+ "torch_seed": 42,
22
+ "random_seed": 42,
23
+ "fewshot_seed": 42
24
+ }
25
+ }
evals/afrimmlu_translate/afrimmlu_translate_ibo-bloomz-1b7.json ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": {
3
+ "afrimmlu_translate_ibo": {
4
+ "acc": 0.238,
5
+ "acc_stderr": 0.019064072958198452,
6
+ "f1": 0.24129061923077083,
7
+ "f1_stderr": "N/A"
8
+ }
9
+ },
10
+ "versions": {
11
+ "afrimmlu_translate_ibo": 1.0,
12
+ "wandb_run_name": "amber-sponge-5"
13
+ },
14
+ "config": {
15
+ "model": "hf",
16
+ "model_args": "pretrained=bigscience/bloomz-1b7",
17
+ "batch_size": 8,
18
+ "device": null,
19
+ "model_dtype": "torch.float16",
20
+ "numpy_seed": 42,
21
+ "torch_seed": 42,
22
+ "random_seed": 42,
23
+ "fewshot_seed": 42
24
+ }
25
+ }
evals/afrimmlu_translate/afrimmlu_translate_ibo-bloomz-3b.json ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": {
3
+ "afrimmlu_translate_ibo": {
4
+ "acc": 0.252,
5
+ "acc_stderr": 0.01943572728224953,
6
+ "f1": 0.2521675138674737,
7
+ "f1_stderr": "N/A"
8
+ }
9
+ },
10
+ "versions": {
11
+ "afrimmlu_translate_ibo": 1.0,
12
+ "wandb_run_name": "atomic-lake-6"
13
+ },
14
+ "config": {
15
+ "model": "hf",
16
+ "model_args": "pretrained=bigscience/bloomz-3b",
17
+ "batch_size": 8,
18
+ "device": null,
19
+ "model_dtype": "torch.float16",
20
+ "numpy_seed": 42,
21
+ "torch_seed": 42,
22
+ "random_seed": 42,
23
+ "fewshot_seed": 42
24
+ }
25
+ }
evals/afrimmlu_translate/afrimmlu_translate_ibo-bloomz-7b1.json ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": {
3
+ "afrimmlu_translate_ibo": {
4
+ "acc": 0.252,
5
+ "acc_stderr": 0.01943572728224953,
6
+ "f1": 0.2527994546842154,
7
+ "f1_stderr": "N/A"
8
+ }
9
+ },
10
+ "versions": {
11
+ "afrimmlu_translate_ibo": 1.0,
12
+ "wandb_run_name": "silver-moon-7"
13
+ },
14
+ "config": {
15
+ "model": "hf",
16
+ "model_args": "pretrained=bigscience/bloomz-7b1",
17
+ "batch_size": 8,
18
+ "device": null,
19
+ "model_dtype": "torch.float16",
20
+ "numpy_seed": 42,
21
+ "torch_seed": 42,
22
+ "random_seed": 42,
23
+ "fewshot_seed": 42
24
+ }
25
+ }
evals/afrimmlu_translate/afrimmlu_translate_ibo-flan-t5-base.json ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": {
3
+ "afrimmlu_translate_ibo": {
4
+ "acc": 0.264,
5
+ "acc_stderr": 0.01973288558592211,
6
+ "f1": 0.26186909736627434,
7
+ "f1_stderr": "N/A"
8
+ }
9
+ },
10
+ "versions": {
11
+ "afrimmlu_translate_ibo": 1.0,
12
+ "wandb_run_name": "serene-frost-12"
13
+ },
14
+ "config": {
15
+ "model": "hf",
16
+ "model_args": "pretrained=google/flan-t5-base",
17
+ "batch_size": 8,
18
+ "device": null,
19
+ "model_dtype": "torch.float32",
20
+ "numpy_seed": 42,
21
+ "torch_seed": 42,
22
+ "random_seed": 42,
23
+ "fewshot_seed": 42
24
+ }
25
+ }
evals/afrimmlu_translate/afrimmlu_translate_ibo-flan-t5-large.json ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": {
3
+ "afrimmlu_translate_ibo": {
4
+ "acc": 0.294,
5
+ "acc_stderr": 0.020395095484936617,
6
+ "f1": 0.2930675969903471,
7
+ "f1_stderr": "N/A"
8
+ }
9
+ },
10
+ "versions": {
11
+ "afrimmlu_translate_ibo": 1.0,
12
+ "wandb_run_name": "whole-serenity-13"
13
+ },
14
+ "config": {
15
+ "model": "hf",
16
+ "model_args": "pretrained=google/flan-t5-large",
17
+ "batch_size": 8,
18
+ "device": null,
19
+ "model_dtype": "torch.float32",
20
+ "numpy_seed": 42,
21
+ "torch_seed": 42,
22
+ "random_seed": 42,
23
+ "fewshot_seed": 42
24
+ }
25
+ }
evals/afrimmlu_translate/afrimmlu_translate_ibo-flan-t5-small.json ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": {
3
+ "afrimmlu_translate_ibo": {
4
+ "acc": 0.262,
5
+ "acc_stderr": 0.019684688820194703,
6
+ "f1": 0.2533535628301984,
7
+ "f1_stderr": "N/A"
8
+ }
9
+ },
10
+ "versions": {
11
+ "afrimmlu_translate_ibo": 1.0,
12
+ "wandb_run_name": "fresh-moon-14"
13
+ },
14
+ "config": {
15
+ "model": "hf",
16
+ "model_args": "pretrained=google/flan-t5-small",
17
+ "batch_size": 8,
18
+ "device": null,
19
+ "model_dtype": "torch.float32",
20
+ "numpy_seed": 42,
21
+ "torch_seed": 42,
22
+ "random_seed": 42,
23
+ "fewshot_seed": 42
24
+ }
25
+ }
evals/afrimmlu_translate/afrimmlu_translate_ibo-mt0-base.json ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": {
3
+ "afrimmlu_translate_ibo": {
4
+ "acc": 0.25,
5
+ "acc_stderr": 0.019384310743640384,
6
+ "f1": 0.24592729926179152,
7
+ "f1_stderr": "N/A"
8
+ }
9
+ },
10
+ "versions": {
11
+ "afrimmlu_translate_ibo": 1.0,
12
+ "wandb_run_name": "icy-star-8"
13
+ },
14
+ "config": {
15
+ "model": "hf",
16
+ "model_args": "pretrained=bigscience/mt0-base",
17
+ "batch_size": 8,
18
+ "device": null,
19
+ "model_dtype": "torch.float32",
20
+ "numpy_seed": 42,
21
+ "torch_seed": 42,
22
+ "random_seed": 42,
23
+ "fewshot_seed": 42
24
+ }
25
+ }