Update src/about.py
Browse files- src/about.py +2 -2
src/about.py
CHANGED
@@ -20,7 +20,7 @@ class Tasks(Enum):
|
|
20 |
task4 = Task("parsinlu_mc", "acc", "ParsiNLU MCQA")
|
21 |
task5 = Task("parsinlu_nli", "acc", "ParsiNLU NLI")
|
22 |
task6 = Task("parsinlu_qqp", "acc", "ParsiNLU QQP")
|
23 |
-
task7 = Task("persian_ARC", "acc", "Persian ARC")
|
24 |
|
25 |
NUM_FEWSHOT = 0 # Change with your few shot
|
26 |
# ---------------------------------------------------
|
@@ -62,7 +62,7 @@ We use our own framework to evaluate the models on the following benchmarks (TO
|
|
62 |
- <a href="https://arxiv.org/abs/2012.06154" target="_blank"> ParsiNLU MCQA </a> (0-shot) - a series of multiple-choice questions in domains of *literature*, *math & logic*, and *common knowledge*.
|
63 |
- <a href="https://arxiv.org/abs/2012.06154" target="_blank"> ParsiNLU NLI </a> (max[0,3,5,10]-shot) - a 3-way classification to determine whether a hypothesis sentence entails, contradicts, or is neutral with respect to a given premise sentence.
|
64 |
- <a href="https://arxiv.org/abs/2012.06154" target="_blank"> ParsiNLU QQP </a> (max[0,2,5,10]-shot) - task of deciding whether a whether two given questions are paraphrases of each other or not.
|
65 |
-
- <a href="https://huggingface.co/datasets/MatinaLLM/persian_arc" target="_blank"> Persian ARC
|
66 |
|
67 |
For all these evaluations, a higher score is a better score.
|
68 |
|
|
|
20 |
task4 = Task("parsinlu_mc", "acc", "ParsiNLU MCQA")
|
21 |
task5 = Task("parsinlu_nli", "acc", "ParsiNLU NLI")
|
22 |
task6 = Task("parsinlu_qqp", "acc", "ParsiNLU QQP")
|
23 |
+
task7 = Task("persian_ARC", "acc", "Persian ARC-C")
|
24 |
|
25 |
NUM_FEWSHOT = 0 # Change with your few shot
|
26 |
# ---------------------------------------------------
|
|
|
62 |
- <a href="https://arxiv.org/abs/2012.06154" target="_blank"> ParsiNLU MCQA </a> (0-shot) - a series of multiple-choice questions in domains of *literature*, *math & logic*, and *common knowledge*.
|
63 |
- <a href="https://arxiv.org/abs/2012.06154" target="_blank"> ParsiNLU NLI </a> (max[0,3,5,10]-shot) - a 3-way classification to determine whether a hypothesis sentence entails, contradicts, or is neutral with respect to a given premise sentence.
|
64 |
- <a href="https://arxiv.org/abs/2012.06154" target="_blank"> ParsiNLU QQP </a> (max[0,2,5,10]-shot) - task of deciding whether a whether two given questions are paraphrases of each other or not.
|
65 |
+
- <a href="https://huggingface.co/datasets/MatinaLLM/persian_arc" target="_blank"> Persian ARC-C</a> (0-shot) - <a href="https://huggingface.co/datasets/allenai/ai2_arc" target="_blank"> ARC (challenging subset) </a> dataset translated to Persian using GPT-4o.
|
66 |
|
67 |
For all these evaluations, a higher score is a better score.
|
68 |
|