AmeyaPrabhu
commited on
Commit
β’
eb87bf5
1
Parent(s):
77f5a8d
Update contamination_report.csv
Browse files- contamination_report.csv +8 -1
contamination_report.csv
CHANGED
@@ -467,13 +467,20 @@ RadNLI;;GPT-3.5;model;0.0;0.0;0.0;model-based;https://arxiv.org/pdf/2308.08493;8
|
|
467 |
cais/mmlu;;GPT-3.5;model;;;52.0;model-based;https://arxiv.org/abs/2311.09783;10
|
468 |
winogrande;;GPT-3.5;model;;;9.0;model-based;https://arxiv.org/abs/2311.09783;10
|
469 |
truthful_qa;;GPT-3.5;model;;;12.0;model-based;https://arxiv.org/abs/2311.09783;10
|
|
|
470 |
|
471 |
cais/mmlu;;GPT-4;model;;;57.0;model-based;https://arxiv.org/abs/2311.09783;10
|
472 |
truthful_qa;;GPT-4;model;;;10.0;model-based;https://arxiv.org/abs/2311.09783;10
|
473 |
winogrande;;GPT-4;model;;;12.0;model-based;https://arxiv.org/abs/2311.09783;10
|
|
|
|
|
|
|
474 |
|
475 |
allenai/openbookqa;;LLaMa 2-13B;model;;;4.0;model-based;https://arxiv.org/abs/2311.09783;10
|
|
|
|
|
476 |
|
477 |
truthful_qa;;Mistral-7B;model;;;15.0;model-based;https://arxiv.org/abs/2311.09783;10
|
478 |
allenai/openbookqa;;Mistral-7B;model;;;10.0;model-based;https://arxiv.org/abs/2311.09783;10
|
479 |
-
winogrande;;Mistral-7B;model;;;3.0;model-based;https://arxiv.org/abs/2311.09783;10
|
|
|
|
467 |
cais/mmlu;;GPT-3.5;model;;;52.0;model-based;https://arxiv.org/abs/2311.09783;10
|
468 |
winogrande;;GPT-3.5;model;;;9.0;model-based;https://arxiv.org/abs/2311.09783;10
|
469 |
truthful_qa;;GPT-3.5;model;;;12.0;model-based;https://arxiv.org/abs/2311.09783;10
|
470 |
+
allenai/openbookqa;;GPT-3.5;model;;;1.0;model-based;https://arxiv.org/abs/2311.09783;10
|
471 |
|
472 |
cais/mmlu;;GPT-4;model;;;57.0;model-based;https://arxiv.org/abs/2311.09783;10
|
473 |
truthful_qa;;GPT-4;model;;;10.0;model-based;https://arxiv.org/abs/2311.09783;10
|
474 |
winogrande;;GPT-4;model;;;12.0;model-based;https://arxiv.org/abs/2311.09783;10
|
475 |
+
allenai/openbookqa;;GPT-4;model;;;1.0;model-based;https://arxiv.org/abs/2311.09783;10
|
476 |
+
Rowan/hellaswag;;GPT-4;model;;;2.0;model-based;https://arxiv.org/abs/2311.09783;10
|
477 |
+
|
478 |
|
479 |
allenai/openbookqa;;LLaMa 2-13B;model;;;4.0;model-based;https://arxiv.org/abs/2311.09783;10
|
480 |
+
truthful_qa;;LLaMa 2-13B;model;;;2.0;model-based;https://arxiv.org/abs/2311.09783;10
|
481 |
+
winogrande;;LLaMa 2-13B;model;;;1.0;model-based;https://arxiv.org/abs/2311.09783;10
|
482 |
|
483 |
truthful_qa;;Mistral-7B;model;;;15.0;model-based;https://arxiv.org/abs/2311.09783;10
|
484 |
allenai/openbookqa;;Mistral-7B;model;;;10.0;model-based;https://arxiv.org/abs/2311.09783;10
|
485 |
+
winogrande;;Mistral-7B;model;;;3.0;model-based;https://arxiv.org/abs/2311.09783;10
|
486 |
+
cais/mmlu;;Mistral-7B;model;;;1.0;model-based;https://arxiv.org/abs/2311.09783;10
|