|
import argparse |
|
import csv |
|
import json |
|
import os |
|
from glob import glob |
|
|
|
from tqdm import tqdm |
|
|
|
|
|
def extract_predictions_from_json(input_folder): |
|
|
|
sub_folder = os.path.join(input_folder, 'submission') |
|
pred_folder = os.path.join(input_folder, 'predictions') |
|
if not os.path.exists(sub_folder): |
|
os.makedirs(sub_folder) |
|
|
|
for model_name in os.listdir(pred_folder): |
|
model_folder = os.path.join(pred_folder, model_name) |
|
try: |
|
|
|
json_paths = glob( |
|
os.path.join(model_folder, 'alignment_bench_*.json')) |
|
|
|
json_paths = sorted( |
|
json_paths, |
|
key=lambda x: int(x.split('.json')[0].split('_')[-1])) |
|
except Exception as e: |
|
|
|
print(e) |
|
json_paths = [os.path.join(model_folder, 'alignment_bench.json')] |
|
|
|
all_predictions = [] |
|
for json_ in json_paths: |
|
json_data = json.load(open(json_)) |
|
for _, value in json_data.items(): |
|
prediction = value['prediction'] |
|
all_predictions.append(prediction) |
|
|
|
|
|
output_path = os.path.join(sub_folder, model_name + '_submission.csv') |
|
with open(output_path, 'w', encoding='utf-8-sig') as file: |
|
writer = csv.writer(file) |
|
for ans in tqdm(all_predictions): |
|
writer.writerow([str(ans)]) |
|
print('Saved {} for submission'.format(output_path)) |
|
|
|
|
|
def process_jsonl(file_path): |
|
new_data = [] |
|
with open(file_path, 'r', encoding='utf-8') as file: |
|
for line in file: |
|
json_data = json.loads(line) |
|
new_dict = { |
|
'question': json_data['question'], |
|
'capability': json_data['category'], |
|
'others': { |
|
'subcategory': json_data['subcategory'], |
|
'reference': json_data['reference'], |
|
'question_id': json_data['question_id'] |
|
} |
|
} |
|
new_data.append(new_dict) |
|
return new_data |
|
|
|
|
|
def save_as_json(data, output_file='./alignment_bench.json'): |
|
with open(output_file, 'w', encoding='utf-8') as file: |
|
json.dump(data, file, indent=4, ensure_ascii=False) |
|
|
|
|
|
def parse_args(): |
|
parser = argparse.ArgumentParser(description='File Converter') |
|
parser.add_argument('--mode', |
|
default='json', |
|
help='The mode of convert to json or convert to csv') |
|
parser.add_argument('--jsonl', |
|
default='./data_release.jsonl', |
|
help='The original jsonl path') |
|
parser.add_argument('--json', |
|
default='your prediction file path', |
|
help='The results json path') |
|
parser.add_argument('--exp-folder', help='The results json name') |
|
args = parser.parse_args() |
|
return args |
|
|
|
|
|
if __name__ == '__main__': |
|
args = parse_args() |
|
mode = args.mode |
|
if mode == 'json': |
|
processed_data = process_jsonl(args.jsonl) |
|
save_as_json(processed_data) |
|
elif mode == 'csv': |
|
extract_predictions_from_json(args.exp_folder) |
|
|