|
task: halueval_qa |
|
dataset_path: pminervini/HaluEval |
|
dataset_name: qa_samples |
|
output_type: generate_until |
|
training_split: null |
|
validation_split: null |
|
test_split: data |
|
num_fewshot: 0 |
|
doc_to_text: !function utils.doc_to_text_qa |
|
doc_to_target: !function utils.doc_to_target |
|
process_results: !function utils.process_results |
|
generation_kwargs: |
|
until: |
|
- "\n" |
|
- "." |
|
do_sample: false |
|
temperature: 0.0 |
|
metric_list: |
|
- metric: em |
|
aggregation: mean |
|
higher_is_better: true |
|
- metric: correctness |
|
aggregation: mean |
|
higher_is_better: true |
|
- metric: acc |
|
aggregation: mean |
|
higher_is_better: true |
|
metadata: |
|
version: 0.0 |
|
|