Add data from "An Open-Source Data Contamination Report for Large Language Models"
Browse files- contamination_report.csv +6 -0
contamination_report.csv
CHANGED
@@ -1,5 +1,11 @@
|
|
1 |
Evaluation Dataset;Subset;Contaminated Source;Model or corpus;Train Split;Development Split;Test Split;Approach;Reference;PR
|
2 |
|
|
|
|
|
|
|
|
|
|
|
|
|
3 |
|
4 |
UCLNLP/adversarial_qa;adversarialQA;allenai/c4;corpus;;;0.03;data-based;https://arxiv.org/abs/2310.20707;2
|
5 |
UCLNLP/adversarial_qa;adversarialQA;oscar-corpus/OSCAR-2301;corpus;;;0.03;data-based;https://arxiv.org/abs/2310.20707;2
|
|
|
1 |
Evaluation Dataset;Subset;Contaminated Source;Model or corpus;Train Split;Development Split;Test Split;Approach;Reference;PR
|
2 |
|
3 |
+
allenai/ai2_arc;;allenai/c4;corpus;;;28.7;data-based;https://arxiv.org/abs/2310.17589;5
|
4 |
+
tau/commonsense_qa;;allenai/c4;corpus;;1.6;;data-based;https://arxiv.org/abs/2310.17589;5
|
5 |
+
winogrande;;allenai/c4;corpus;;1.1;;data-based;https://arxiv.org/abs/2310.17589;5
|
6 |
+
ceval/ceval-exam;;allenai/c4;corpus;;45.8;;data-based;https://arxiv.org/abs/2310.17589;5
|
7 |
+
Rowan/hellaswag;;allenai/c4;corpus;;12.4;;data-based;https://arxiv.org/abs/2310.17589;5
|
8 |
+
cais/mmlu;;allenai/c4;corpus;;;29.1;data-based;https://arxiv.org/abs/2310.17589;5
|
9 |
|
10 |
UCLNLP/adversarial_qa;adversarialQA;allenai/c4;corpus;;;0.03;data-based;https://arxiv.org/abs/2310.20707;2
|
11 |
UCLNLP/adversarial_qa;adversarialQA;oscar-corpus/OSCAR-2301;corpus;;;0.03;data-based;https://arxiv.org/abs/2310.20707;2
|