eduagarcia commited on
Commit
4445ad2
1 Parent(s): 71ecfbb

Refactor Tasks to load by yaml configuration file

Browse files
Files changed (2) hide show
  1. src/display/utils.py +4 -55
  2. tasks_config.yaml +52 -0
src/display/utils.py CHANGED
@@ -2,6 +2,7 @@ from dataclasses import dataclass, make_dataclass
2
  from enum import Enum
3
  from typing import List
4
  import pandas as pd
 
5
  from src.envs import GET_ORIGINAL_HF_LEADERBOARD_EVAL_RESULTS
6
 
7
  def fields(raw_class):
@@ -21,61 +22,9 @@ class Task:
21
  link: str = None
22
  description: str = None
23
 
24
- class Tasks(Enum):
25
- oab_exams = Task(
26
- benchmark="oab_exams",
27
- metric="exact_match",
28
- col_name="OAB Exams",
29
- baseline=25.0,
30
- human_baseline=50.0,
31
- few_shot=5,
32
- limit=None,
33
- task_list=["oab_exams_generate"],
34
- link="https://huggingface.co/datasets/eduagarcia/oab_exams",
35
- description="OAB Exams is a dataset of 2,000 questions from the Brazilian Bar Association's exams."
36
- )
37
- brazilian_court_decisions_judgment = Task(
38
- benchmark="brazilian_court_decisions_judgment",
39
- metric="f1_macro",
40
- col_name="BR Court Decisions",
41
- baseline=33.33,
42
- human_baseline=100.0,
43
- few_shot=5,
44
- limit=None,
45
- task_list=["brazilian_court_decisions_judgment_generate"],
46
- link="https://huggingface.co/datasets/joelniklaus/brazilian_court_decisions",
47
- description="A classification dataset of court decisions from the Tribunal de Justiça de Alagoas (TJAL, the State Supreme Court of Alagoas (Brazil)."
48
- )
49
- datalawyer_frases = Task(
50
- benchmark="datalawyer_frases",
51
- metric="f1_macro",
52
- col_name="DL Frases",
53
- baseline=10.0,
54
- human_baseline=100.0,
55
- few_shot=15,
56
- limit=2000,
57
- task_list=["datalawyer_frases_generate"],
58
- link="https://huggingface.co/datasets/eduagarcia/portuguese_benchmark",
59
- description="A classification dataset"
60
- )
61
- rrip = Task(
62
- benchmark="rrip",
63
- metric="f1_macro",
64
- col_name="RRIP",
65
- baseline=12.5,
66
- human_baseline=100.0,
67
- few_shot=15,
68
- limit=None,
69
- task_list=["rrip_generate"],
70
- link="https://huggingface.co/datasets/eduagarcia/portuguese_benchmark",
71
- description="A classification dataset"
72
- )
73
- #arc = Task("arc:challenge", "acc_norm", "ARC", 25.0, 80.0)
74
- #hellaswag = Task("hellaswag", "acc_norm", "HellaSwag", 25.0, 95.0)
75
- #mmlu = Task("hendrycksTest", "acc", "MMLU", 25.0, 89.8)
76
- #truthfulqa = Task("truthfulqa:mc", "mc2", "TruthfulQA", 25.0, 94.0)
77
- #winogrande = Task("winogrande", "acc", "Winogrande", 50.0, 94.0)
78
- #gsm8k = Task("gsm8k", "acc", "GSM8K", 0.21, 100)
79
 
80
  # These classes are for user facing column names,
81
  # to avoid having to change them all around the code
 
2
  from enum import Enum
3
  from typing import List
4
  import pandas as pd
5
+ from yaml import safe_load
6
  from src.envs import GET_ORIGINAL_HF_LEADERBOARD_EVAL_RESULTS
7
 
8
  def fields(raw_class):
 
22
  link: str = None
23
  description: str = None
24
 
25
+ with open('tasks_config.yaml', 'r', encoding='utf-8') as f:
26
+ tasks_config = safe_load(f)
27
+ Tasks = Enum('Tasks', {k: Task(**v) for k, v in tasks_config['tasks'].items()})
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
 
29
  # These classes are for user facing column names,
30
  # to avoid having to change them all around the code
tasks_config.yaml ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ version: 0.0.4
2
+ tasks:
3
+ oab_exams:
4
+ benchmark: oab_exams
5
+ col_name: OAB Exams
6
+ task_list:
7
+ - oab_exams_generate
8
+ metric: exact_match
9
+ few_shot: 5
10
+ limit: null
11
+ baseline: 25.0
12
+ human_baseline: 50.0
13
+ description: OAB Exams is a dataset of 2,000 questions from the Brazilian Bar
14
+ Association's exams.
15
+ link: https://huggingface.co/datasets/eduagarcia/oab_exams
16
+ brazilian_court_decisions_judgment:
17
+ benchmark: brazilian_court_decisions_judgment
18
+ col_name: BR Court Decisions
19
+ task_list:
20
+ - brazilian_court_decisions_judgment_generate
21
+ metric: f1_macro
22
+ few_shot: 5
23
+ limit: null
24
+ baseline: 33.33
25
+ human_baseline: 100.0
26
+ description: A classification dataset of court decisions from the Tribunal de
27
+ Justiça de Alagoas (TJAL, the State Supreme Court of Alagoas (Brazil).
28
+ link: https://huggingface.co/datasets/joelniklaus/brazilian_court_decisions
29
+ datalawyer_frases:
30
+ benchmark: datalawyer_frases
31
+ col_name: DL Frases
32
+ task_list:
33
+ - datalawyer_frases_generate
34
+ metric: f1_macro
35
+ few_shot: 15
36
+ limit: 2000
37
+ baseline: 10.0
38
+ human_baseline: 100.0
39
+ description: A classification dataset
40
+ link: https://huggingface.co/datasets/eduagarcia/portuguese_benchmark
41
+ rrip:
42
+ benchmark: rrip
43
+ col_name: RRIP
44
+ task_list:
45
+ - rrip_generate
46
+ metric: f1_macro
47
+ few_shot: 15
48
+ limit: null
49
+ baseline: 12.5
50
+ human_baseline: 100.0
51
+ description: A classification dataset
52
+ link: https://huggingface.co/datasets/eduagarcia/portuguese_benchmark