t0-0 commited on
Commit
cf10aa9
1 Parent(s): 567d2b9

Add information to Task

Browse files
Files changed (2) hide show
  1. src/about.py +82 -53
  2. src/display/utils.py +12 -2
src/about.py CHANGED
@@ -2,78 +2,107 @@ from dataclasses import dataclass
2
  from enum import Enum
3
 
4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
  @dataclass
6
  class Task:
7
  benchmark: str
8
  metric: str
9
  col_name: str
 
 
10
 
11
 
12
  # Select your tasks here
13
  # ---------------------------------------------------
14
  class Tasks(Enum):
15
- AVG = Task("scores", "AVG", "AVG")
16
- CG = Task("scores", "CG", "CG - コード生成") # Code Generation - コード生成
17
- EL = Task("scores", "EL", "EL - エンティティリンキング") # Entity Linking - エンティティリンキング
18
- FA = Task("scores", "FA", "FA - 基礎分析") # Fundamental Analysis - 基礎分析
19
- HE = Task("scores", "HE", "HE - 言語理解") # Human Examination - 言語理解
20
- MC = Task("scores", "MC", "MC - 多肢選択式質問応答") # Multiple Choice question answering - 多肢選択式質問応答
21
- MR = Task("scores", "MR", "MR - 数学的推論") # Mathematical Reasoning - 数学的推論
22
- MT = Task("scores", "MT", "MT - 機械翻訳") # Machine Translation - 機械翻訳
23
- NLI = Task("scores", "NLI", "NLI - 自然言語推論") # Natural Language Inference - 自然言語推論
24
- QA = Task("scores", "QA", "QA - 質問応答") # Question Answering - 質問応答
25
- RC = Task("scores", "RC", "RC - 読解力") # Reading Comprehension - 読解力
26
- SUM = Task("scores", "SUM", "SUM - 要約") # Summarization - 要約
27
- alt_e_to_j_bert_score_ja_f1 = Task("scores", "alt-e-to-j_bert_score_ja_f1", "ALT E to J BERT Score")
28
- alt_e_to_j_bleu_ja = Task("scores", "alt-e-to-j_bleu_ja", "ALT E to J BLEU")
29
- alt_e_to_j_comet_wmt22 = Task("scores", "alt-e-to-j_comet_wmt22", "ALT E to J COMET WMT22")
30
- alt_j_to_e_bert_score_en_f1 = Task("scores", "alt-j-to-e_bert_score_en_f1", "ALT J to E BERT Score")
31
- alt_j_to_e_bleu_en = Task("scores", "alt-j-to-e_bleu_en", "ALT J to E BLEU")
32
- alt_j_to_e_comet_wmt22 = Task("scores", "alt-j-to-e_comet_wmt22", "ALT J to E COMET WMT22")
33
- chabsa_set_f1 = Task("scores", "chabsa_set_f1", "ChABSA")
34
- commonsensemoralja_exact_match = Task("scores", "commonsensemoralja_exact_match", "CommonSenseMoralJA")
35
- jamp_exact_match = Task("scores", "jamp_exact_match", "JAMP")
36
- janli_exact_match = Task("scores", "janli_exact_match", "JANLI")
37
- jcommonsenseqa_exact_match = Task("scores", "jcommonsenseqa_exact_match", "JCommonSenseQA")
38
- jemhopqa_char_f1 = Task("scores", "jemhopqa_char_f1", "JEMHopQA")
39
- jmmlu_exact_match = Task("scores", "jmmlu_exact_match", "JMMLU")
40
- jnli_exact_match = Task("scores", "jnli_exact_match", "JNLI")
41
- jsem_exact_match = Task("scores", "jsem_exact_match", "JSEM")
42
- jsick_exact_match = Task("scores", "jsick_exact_match", "JSICK")
43
- jsquad_char_f1 = Task("scores", "jsquad_char_f1", "JSquad")
 
 
 
 
 
 
44
  jsts_pearson = Task(
45
- "scores", "jsts_pearson", "JSTS (Pearson) - 意味的類似度"
46
  ) # Semantic Textual Similarity - 意味的類似度
47
  jsts_spearman = Task(
48
- "scores", "jsts_spearman", "JSTS (Spearman) - 意味的類似度"
49
  ) # Semantic Textual Similarity - 意味的類似度
50
- kuci_exact_match = Task("scores", "kuci_exact_match", "KUCI")
51
- mawps_exact_match = Task("scores", "mawps_exact_match", "MAWPS")
52
- mbpp_code_exec = Task("scores", "mbpp_code_exec", "MBPP(exec)")
53
- mbpp_pylint_check = Task("scores", "mbpp_pylint_check", "MBPP(pylint)")
54
- mmlu_en_exact_match = Task("scores", "mmlu_en_exact_match", "MMLU")
55
- niilc_char_f1 = Task("scores", "niilc_char_f1", "NIILC")
56
- wiki_coreference_set_f1 = Task("scores", "wiki_coreference_set_f1", "Wiki Coreference")
57
- wiki_dependency_set_f1 = Task("scores", "wiki_dependency_set_f1", "Wiki Dependency")
58
- wiki_ner_set_f1 = Task("scores", "wiki_ner_set_f1", "Wiki NER")
59
- wiki_pas_set_f1 = Task("scores", "wiki_pas_set_f1", "Wiki PAS")
60
- wiki_reading_char_f1 = Task("scores", "wiki_reading_char_f1", "Wiki Reading")
61
  wikicorpus_e_to_j_bert_score_ja_f1 = Task(
62
- "scores", "wikicorpus-e-to-j_bert_score_ja_f1", "WikiCorpus E to J BERT Score"
 
 
 
 
63
  )
64
- wikicorpus_e_to_j_bleu_ja = Task("scores", "wikicorpus-e-to-j_bleu_ja", "WikiCorpus E to J BLEU")
65
- wikicorpus_e_to_j_comet_wmt22 = Task("scores", "wikicorpus-e-to-j_comet_wmt22", "WikiCorpus E to J COMET WMT22")
66
  wikicorpus_j_to_e_bert_score_en_f1 = Task(
67
- "scores", "wikicorpus-j-to-e_bert_score_en_f1", "WikiCorpus J to E BERT Score"
 
 
 
 
68
  )
69
- wikicorpus_j_to_e_bleu_en = Task("scores", "wikicorpus-j-to-e_bleu_en", "WikiCorpus J to E BLEU")
70
- wikicorpus_j_to_e_comet_wmt22 = Task("scores", "wikicorpus-j-to-e_comet_wmt22", "WikiCorpus J to E COMET WMT22")
71
- xlsum_ja_bert_score_ja_f1 = Task("scores", "xlsum_ja_bert_score_ja_f1", "XL-Sum JA BERT Score")
72
- xlsum_ja_bleu_ja = Task("scores", "xlsum_ja_bleu_ja", "XL-Sum JA BLEU")
73
- xlsum_ja_rouge1 = Task("scores", "xlsum_ja_rouge1", "XL-Sum ROUGE1")
74
- xlsum_ja_rouge2 = Task("scores", "xlsum_ja_rouge2", "XL-Sum ROUGE2")
75
  # xlsum_ja_rouge2_scaling = Task("scores", "xlsum_ja_rouge2_scaling", "XL-Sum JA ROUGE2 Scaling")
76
- xlsum_ja_rougeLsum = Task("scores", "xlsum_ja_rougeLsum", "XL-Sum ROUGE-Lsum")
77
 
78
 
79
  NUM_FEWSHOT = 0 # Change with your few shot
 
2
  from enum import Enum
3
 
4
 
5
+ class TaskType(Enum):
6
+ ALL = "ALL"
7
+ NLI = "NLI"
8
+ QA = "QA"
9
+ RC = "RC"
10
+ MC = "MC"
11
+ EL = "EL"
12
+ FA = "FA"
13
+ MR = "MR"
14
+ MT = "MT"
15
+ STS = "STS"
16
+ HE = "HE"
17
+ CG = "CG"
18
+ SUM = "SUM"
19
+ NotTask = "?"
20
+
21
+
22
  @dataclass
23
  class Task:
24
  benchmark: str
25
  metric: str
26
  col_name: str
27
+ task_type: TaskType
28
+ average: bool = False
29
 
30
 
31
  # Select your tasks here
32
  # ---------------------------------------------------
33
  class Tasks(Enum):
34
+ AVG = Task("scores", "AVG", "AVG", TaskType.ALL, True)
35
+ CG = Task("scores", "CG", "CG - コード生成", TaskType.CG, True) # Code Generation - コード生成
36
+ EL = Task(
37
+ "scores", "EL", "EL - エンティティリンキング", TaskType.EL, True
38
+ ) # Entity Linking - エンティティリンキング
39
+ FA = Task("scores", "FA", "FA - 基礎分析", TaskType.FA, True) # Fundamental Analysis - 基礎分析
40
+ HE = Task("scores", "HE", "HE - 言語理解", TaskType.HE, True) # Human Examination - 言語理解
41
+ MC = Task(
42
+ "scores", "MC", "MC - 多肢選択式質問応答", TaskType.MC, True
43
+ ) # Multiple Choice question answering - 多肢選択式質問応答
44
+ MR = Task("scores", "MR", "MR - 数学的推論", TaskType.MR, True) # Mathematical Reasoning - 数学的推論
45
+ MT = Task("scores", "MT", "MT - 機械翻訳", TaskType.MT, True) # Machine Translation - 機械翻訳
46
+ NLI = Task("scores", "NLI", "NLI - 自然言語推論", TaskType.NLI, True) # Natural Language Inference - 自然言語推論
47
+ QA = Task("scores", "QA", "QA - 質問応答", TaskType.QA, True) # Question Answering - 質問応答
48
+ RC = Task("scores", "RC", "RC - 読解力", TaskType.RC, True) # Reading Comprehension - 読解力
49
+ SUM = Task("scores", "SUM", "SUM - 要約", TaskType.SUM, True) # Summarization - 要約
50
+ alt_e_to_j_bert_score_ja_f1 = Task("scores", "alt-e-to-j_bert_score_ja_f1", "ALT E to J BERT Score", TaskType.MT)
51
+ alt_e_to_j_bleu_ja = Task("scores", "alt-e-to-j_bleu_ja", "ALT E to J BLEU", TaskType.MT)
52
+ alt_e_to_j_comet_wmt22 = Task("scores", "alt-e-to-j_comet_wmt22", "ALT E to J COMET WMT22", TaskType.MT)
53
+ alt_j_to_e_bert_score_en_f1 = Task("scores", "alt-j-to-e_bert_score_en_f1", "ALT J to E BERT Score", TaskType.MT)
54
+ alt_j_to_e_bleu_en = Task("scores", "alt-j-to-e_bleu_en", "ALT J to E BLEU", TaskType.MT)
55
+ alt_j_to_e_comet_wmt22 = Task("scores", "alt-j-to-e_comet_wmt22", "ALT J to E COMET WMT22", TaskType.MT)
56
+ chabsa_set_f1 = Task("scores", "chabsa_set_f1", "ChABSA", TaskType.EL)
57
+ commonsensemoralja_exact_match = Task(
58
+ "scores", "commonsensemoralja_exact_match", "CommonSenseMoralJA", TaskType.MC
59
+ )
60
+ jamp_exact_match = Task("scores", "jamp_exact_match", "JAMP", TaskType.NLI)
61
+ janli_exact_match = Task("scores", "janli_exact_match", "JANLI", TaskType.NLI)
62
+ jcommonsenseqa_exact_match = Task("scores", "jcommonsenseqa_exact_match", "JCommonSenseQA", TaskType.MC)
63
+ jemhopqa_char_f1 = Task("scores", "jemhopqa_char_f1", "JEMHopQA", TaskType.QA)
64
+ jmmlu_exact_match = Task("scores", "jmmlu_exact_match", "JMMLU", TaskType.HE)
65
+ jnli_exact_match = Task("scores", "jnli_exact_match", "JNLI", TaskType.NLI)
66
+ jsem_exact_match = Task("scores", "jsem_exact_match", "JSEM", TaskType.NLI)
67
+ jsick_exact_match = Task("scores", "jsick_exact_match", "JSICK", TaskType.NLI)
68
+ jsquad_char_f1 = Task("scores", "jsquad_char_f1", "JSquad", TaskType.RC)
69
  jsts_pearson = Task(
70
+ "scores", "jsts_pearson", "JSTS (Pearson) - 意味的類似度", TaskType.STS
71
  ) # Semantic Textual Similarity - 意味的類似度
72
  jsts_spearman = Task(
73
+ "scores", "jsts_spearman", "JSTS (Spearman) - 意味的類似度", TaskType.STS
74
  ) # Semantic Textual Similarity - 意味的類似度
75
+ kuci_exact_match = Task("scores", "kuci_exact_match", "KUCI", TaskType.MC)
76
+ mawps_exact_match = Task("scores", "mawps_exact_match", "MAWPS", TaskType.MR)
77
+ mbpp_code_exec = Task("scores", "mbpp_code_exec", "MBPP(exec)", TaskType.CG)
78
+ mbpp_pylint_check = Task("scores", "mbpp_pylint_check", "MBPP(pylint)", TaskType.CG)
79
+ mmlu_en_exact_match = Task("scores", "mmlu_en_exact_match", "MMLU", TaskType.HE)
80
+ niilc_char_f1 = Task("scores", "niilc_char_f1", "NIILC", TaskType.QA)
81
+ wiki_coreference_set_f1 = Task("scores", "wiki_coreference_set_f1", "Wiki Coreference", TaskType.FA)
82
+ wiki_dependency_set_f1 = Task("scores", "wiki_dependency_set_f1", "Wiki Dependency", TaskType.FA)
83
+ wiki_ner_set_f1 = Task("scores", "wiki_ner_set_f1", "Wiki NER", TaskType.FA)
84
+ wiki_pas_set_f1 = Task("scores", "wiki_pas_set_f1", "Wiki PAS", TaskType.FA)
85
+ wiki_reading_char_f1 = Task("scores", "wiki_reading_char_f1", "Wiki Reading", TaskType.FA)
86
  wikicorpus_e_to_j_bert_score_ja_f1 = Task(
87
+ "scores", "wikicorpus-e-to-j_bert_score_ja_f1", "WikiCorpus E to J BERT Score", TaskType.MT
88
+ )
89
+ wikicorpus_e_to_j_bleu_ja = Task("scores", "wikicorpus-e-to-j_bleu_ja", "WikiCorpus E to J BLEU", TaskType.MT)
90
+ wikicorpus_e_to_j_comet_wmt22 = Task(
91
+ "scores", "wikicorpus-e-to-j_comet_wmt22", "WikiCorpus E to J COMET WMT22", TaskType.MT
92
  )
 
 
93
  wikicorpus_j_to_e_bert_score_en_f1 = Task(
94
+ "scores", "wikicorpus-j-to-e_bert_score_en_f1", "WikiCorpus J to E BERT Score", TaskType.MT
95
+ )
96
+ wikicorpus_j_to_e_bleu_en = Task("scores", "wikicorpus-j-to-e_bleu_en", "WikiCorpus J to E BLEU", TaskType.MT)
97
+ wikicorpus_j_to_e_comet_wmt22 = Task(
98
+ "scores", "wikicorpus-j-to-e_comet_wmt22", "WikiCorpus J to E COMET WMT22", TaskType.MT
99
  )
100
+ xlsum_ja_bert_score_ja_f1 = Task("scores", "xlsum_ja_bert_score_ja_f1", "XL-Sum JA BERT Score", TaskType.SUM)
101
+ xlsum_ja_bleu_ja = Task("scores", "xlsum_ja_bleu_ja", "XL-Sum JA BLEU", TaskType.SUM)
102
+ xlsum_ja_rouge1 = Task("scores", "xlsum_ja_rouge1", "XL-Sum ROUGE1", TaskType.SUM)
103
+ xlsum_ja_rouge2 = Task("scores", "xlsum_ja_rouge2", "XL-Sum ROUGE2", TaskType.SUM)
 
 
104
  # xlsum_ja_rouge2_scaling = Task("scores", "xlsum_ja_rouge2_scaling", "XL-Sum JA ROUGE2 Scaling")
105
+ xlsum_ja_rougeLsum = Task("scores", "xlsum_ja_rougeLsum", "XL-Sum ROUGE-Lsum", TaskType.SUM)
106
 
107
 
108
  NUM_FEWSHOT = 0 # Change with your few shot
src/display/utils.py CHANGED
@@ -3,7 +3,7 @@ from enum import Enum
3
 
4
  import pandas as pd
5
 
6
- from src.about import Tasks
7
 
8
 
9
  def fields(raw_class):
@@ -21,6 +21,8 @@ class ColumnContent:
21
  hidden: bool = False
22
  never_hidden: bool = False
23
  dummy: bool = False
 
 
24
 
25
 
26
  ## Leaderboard columns
@@ -31,7 +33,15 @@ auto_eval_column_dict.append(["model", ColumnContent, ColumnContent("Model", "ma
31
  # Scores
32
  # auto_eval_column_dict.append(["average", ColumnContent, ColumnContent("Average ⬆️", "number", True)])
33
  for task in Tasks:
34
- auto_eval_column_dict.append([task.name, ColumnContent, ColumnContent(task.value.col_name, "number", True)])
 
 
 
 
 
 
 
 
35
  # Model information
36
  auto_eval_column_dict.append(["model_type", ColumnContent, ColumnContent("Type", "str", False)])
37
  auto_eval_column_dict.append(["architecture", ColumnContent, ColumnContent("Architecture", "str", False)])
 
3
 
4
  import pandas as pd
5
 
6
+ from src.about import Tasks, TaskType
7
 
8
 
9
  def fields(raw_class):
 
21
  hidden: bool = False
22
  never_hidden: bool = False
23
  dummy: bool = False
24
+ task_type: TaskType = TaskType.NotTask
25
+ average: bool = False
26
 
27
 
28
  ## Leaderboard columns
 
33
  # Scores
34
  # auto_eval_column_dict.append(["average", ColumnContent, ColumnContent("Average ⬆️", "number", True)])
35
  for task in Tasks:
36
+ auto_eval_column_dict.append(
37
+ [
38
+ task.name,
39
+ ColumnContent,
40
+ ColumnContent(
41
+ task.value.col_name, "number", True, task_type=task.value.task_type, average=task.value.average
42
+ ),
43
+ ]
44
+ )
45
  # Model information
46
  auto_eval_column_dict.append(["model_type", ColumnContent, ColumnContent("Type", "str", False)])
47
  auto_eval_column_dict.append(["architecture", ColumnContent, ColumnContent("Architecture", "str", False)])