File size: 2,991 Bytes
6a732ce 9691525 6a732ce 9691525 6a732ce 9691525 6a732ce |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 |
import os
import json
import wandb
import argparse
curr_dir = os.path.dirname(os.path.realpath(__file__))
LANGUAGES = ['amh', 'eng', 'ewe', 'fra', 'hau', 'ibo', 'kin', 'lin', 'lug', 'orm', 'sna', 'sot', 'swa', 'twi', 'wol', 'xho', 'yor', 'zul']
BENCHMARCK2PROJECT = {
"afrimmlu_direct" : "african-research-collective/llm-evaluation-afrimmlu-direct",
"afrimmlu_translate" : "african-research-collective/llm-evaluation-afrimmlu-translate"
}
BENCHMARK_VERSION = 1.0
def main(args):
api = wandb.Api()
runs = api.runs(BENCHMARCK2PROJECT[args.benchmark])
print(runs)
for run in runs:
# .summary contains the output keys/values for metrics like accuracy.
# We call ._json_dict to omit large files
for lang in LANGUAGES:
try:
lang_result_key = f'{args.benchmark}_{lang}'
results = {lang_result_key: {}}
config = {}
versions = {}
results[lang_result_key]['acc'] = run.summary._json_dict[f'{lang_result_key}/acc']
results[lang_result_key]['acc_stderr'] = run.summary._json_dict[f'{lang_result_key}/acc_stderr']
results[lang_result_key]['f1'] = run.summary._json_dict[f'{lang_result_key}/f1']
results[lang_result_key]['f1_stderr'] = run.summary._json_dict[f'{lang_result_key}/f1_stderr']
versions[lang_result_key] = BENCHMARK_VERSION
versions['wandb_run_name'] = run.name
config['model'] = run.config['cli_configs']['model']
config['model_args'] = run.config['cli_configs']['model_args']
config['batch_size'] = run.config['cli_configs']['batch_size']
config['device'] = run.config['cli_configs']['device']
config['model_dtype'] = run.config['cli_configs']['model_dtype']
config['numpy_seed'] = run.config['cli_configs']['numpy_seed']
config['torch_seed'] = run.config['cli_configs']['torch_seed']
config['random_seed'] = run.config['cli_configs']['random_seed']
config['fewshot_seed'] = run.config['cli_configs']['fewshot_seed']
final_json_object = {
'results': results,
'versions': versions,
'config': config
}
pretrained_model = config['model_args'].split(',')[0].split('=')[1].split('/')[-1]
with open(os.path.join(curr_dir, f"evals/{args.benchmark}/{args.benchmark}_{lang}-{pretrained_model}.json"), 'w') as f:
json.dump(final_json_object, f, indent=2)
except KeyError as e:
print(f"KeyError: {e}")
continue
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('--benchmark', type=str, required=True)
args = parser.parse_args()
main(args)
|