api-demo / opencompass-my-api /tools /convert_alignmentbench.py
TwT-6's picture
Upload 2667 files
256a159 verified
raw
history blame
3.24 kB
import argparse
import csv
import json
import os
from glob import glob
from tqdm import tqdm
def extract_predictions_from_json(input_folder):
sub_folder = os.path.join(input_folder, 'submission')
pred_folder = os.path.join(input_folder, 'predictions')
if not os.path.exists(sub_folder):
os.makedirs(sub_folder)
for model_name in os.listdir(pred_folder):
model_folder = os.path.join(pred_folder, model_name)
try:
# when use split
json_paths = glob(
os.path.join(model_folder, 'alignment_bench_*.json'))
# sorted by index
json_paths = sorted(
json_paths,
key=lambda x: int(x.split('.json')[0].split('_')[-1]))
except Exception as e:
# when only one complete file
print(e)
json_paths = [os.path.join(model_folder, 'alignment_bench.json')]
all_predictions = []
for json_ in json_paths:
json_data = json.load(open(json_))
for _, value in json_data.items():
prediction = value['prediction']
all_predictions.append(prediction)
# for prediction
output_path = os.path.join(sub_folder, model_name + '_submission.csv')
with open(output_path, 'w', encoding='utf-8-sig') as file:
writer = csv.writer(file)
for ans in tqdm(all_predictions):
writer.writerow([str(ans)])
print('Saved {} for submission'.format(output_path))
def process_jsonl(file_path):
new_data = []
with open(file_path, 'r', encoding='utf-8') as file:
for line in file:
json_data = json.loads(line)
new_dict = {
'question': json_data['question'],
'capability': json_data['category'],
'others': {
'subcategory': json_data['subcategory'],
'reference': json_data['reference'],
'question_id': json_data['question_id']
}
}
new_data.append(new_dict)
return new_data
def save_as_json(data, output_file='./alignment_bench.json'):
with open(output_file, 'w', encoding='utf-8') as file:
json.dump(data, file, indent=4, ensure_ascii=False)
def parse_args():
parser = argparse.ArgumentParser(description='File Converter')
parser.add_argument('--mode',
default='json',
help='The mode of convert to json or convert to csv')
parser.add_argument('--jsonl',
default='./data_release.jsonl',
help='The original jsonl path')
parser.add_argument('--json',
default='your prediction file path',
help='The results json path')
parser.add_argument('--exp-folder', help='The results json name')
args = parser.parse_args()
return args
if __name__ == '__main__':
args = parse_args()
mode = args.mode
if mode == 'json':
processed_data = process_jsonl(args.jsonl)
save_as_json(processed_data)
elif mode == 'csv':
extract_predictions_from_json(args.exp_folder)