Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
t0-0
commited on
Commit
•
cf10aa9
1
Parent(s):
567d2b9
Add information to Task
Browse files- src/about.py +82 -53
- src/display/utils.py +12 -2
src/about.py
CHANGED
@@ -2,78 +2,107 @@ from dataclasses import dataclass
|
|
2 |
from enum import Enum
|
3 |
|
4 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
5 |
@dataclass
|
6 |
class Task:
|
7 |
benchmark: str
|
8 |
metric: str
|
9 |
col_name: str
|
|
|
|
|
10 |
|
11 |
|
12 |
# Select your tasks here
|
13 |
# ---------------------------------------------------
|
14 |
class Tasks(Enum):
|
15 |
-
AVG = Task("scores", "AVG", "AVG")
|
16 |
-
CG = Task("scores", "CG", "CG - コード生成") # Code Generation - コード生成
|
17 |
-
EL = Task(
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
44 |
jsts_pearson = Task(
|
45 |
-
"scores", "jsts_pearson", "JSTS (Pearson) - 意味的類似度"
|
46 |
) # Semantic Textual Similarity - 意味的類似度
|
47 |
jsts_spearman = Task(
|
48 |
-
"scores", "jsts_spearman", "JSTS (Spearman) - 意味的類似度"
|
49 |
) # Semantic Textual Similarity - 意味的類似度
|
50 |
-
kuci_exact_match = Task("scores", "kuci_exact_match", "KUCI")
|
51 |
-
mawps_exact_match = Task("scores", "mawps_exact_match", "MAWPS")
|
52 |
-
mbpp_code_exec = Task("scores", "mbpp_code_exec", "MBPP(exec)")
|
53 |
-
mbpp_pylint_check = Task("scores", "mbpp_pylint_check", "MBPP(pylint)")
|
54 |
-
mmlu_en_exact_match = Task("scores", "mmlu_en_exact_match", "MMLU")
|
55 |
-
niilc_char_f1 = Task("scores", "niilc_char_f1", "NIILC")
|
56 |
-
wiki_coreference_set_f1 = Task("scores", "wiki_coreference_set_f1", "Wiki Coreference")
|
57 |
-
wiki_dependency_set_f1 = Task("scores", "wiki_dependency_set_f1", "Wiki Dependency")
|
58 |
-
wiki_ner_set_f1 = Task("scores", "wiki_ner_set_f1", "Wiki NER")
|
59 |
-
wiki_pas_set_f1 = Task("scores", "wiki_pas_set_f1", "Wiki PAS")
|
60 |
-
wiki_reading_char_f1 = Task("scores", "wiki_reading_char_f1", "Wiki Reading")
|
61 |
wikicorpus_e_to_j_bert_score_ja_f1 = Task(
|
62 |
-
"scores", "wikicorpus-e-to-j_bert_score_ja_f1", "WikiCorpus E to J BERT Score"
|
|
|
|
|
|
|
|
|
63 |
)
|
64 |
-
wikicorpus_e_to_j_bleu_ja = Task("scores", "wikicorpus-e-to-j_bleu_ja", "WikiCorpus E to J BLEU")
|
65 |
-
wikicorpus_e_to_j_comet_wmt22 = Task("scores", "wikicorpus-e-to-j_comet_wmt22", "WikiCorpus E to J COMET WMT22")
|
66 |
wikicorpus_j_to_e_bert_score_en_f1 = Task(
|
67 |
-
"scores", "wikicorpus-j-to-e_bert_score_en_f1", "WikiCorpus J to E BERT Score"
|
|
|
|
|
|
|
|
|
68 |
)
|
69 |
-
|
70 |
-
|
71 |
-
|
72 |
-
|
73 |
-
xlsum_ja_rouge1 = Task("scores", "xlsum_ja_rouge1", "XL-Sum ROUGE1")
|
74 |
-
xlsum_ja_rouge2 = Task("scores", "xlsum_ja_rouge2", "XL-Sum ROUGE2")
|
75 |
# xlsum_ja_rouge2_scaling = Task("scores", "xlsum_ja_rouge2_scaling", "XL-Sum JA ROUGE2 Scaling")
|
76 |
-
xlsum_ja_rougeLsum = Task("scores", "xlsum_ja_rougeLsum", "XL-Sum ROUGE-Lsum")
|
77 |
|
78 |
|
79 |
NUM_FEWSHOT = 0 # Change with your few shot
|
|
|
2 |
from enum import Enum
|
3 |
|
4 |
|
5 |
+
class TaskType(Enum):
|
6 |
+
ALL = "ALL"
|
7 |
+
NLI = "NLI"
|
8 |
+
QA = "QA"
|
9 |
+
RC = "RC"
|
10 |
+
MC = "MC"
|
11 |
+
EL = "EL"
|
12 |
+
FA = "FA"
|
13 |
+
MR = "MR"
|
14 |
+
MT = "MT"
|
15 |
+
STS = "STS"
|
16 |
+
HE = "HE"
|
17 |
+
CG = "CG"
|
18 |
+
SUM = "SUM"
|
19 |
+
NotTask = "?"
|
20 |
+
|
21 |
+
|
22 |
@dataclass
|
23 |
class Task:
|
24 |
benchmark: str
|
25 |
metric: str
|
26 |
col_name: str
|
27 |
+
task_type: TaskType
|
28 |
+
average: bool = False
|
29 |
|
30 |
|
31 |
# Select your tasks here
|
32 |
# ---------------------------------------------------
|
33 |
class Tasks(Enum):
|
34 |
+
AVG = Task("scores", "AVG", "AVG", TaskType.ALL, True)
|
35 |
+
CG = Task("scores", "CG", "CG - コード生成", TaskType.CG, True) # Code Generation - コード生成
|
36 |
+
EL = Task(
|
37 |
+
"scores", "EL", "EL - エンティティリンキング", TaskType.EL, True
|
38 |
+
) # Entity Linking - エンティティリンキング
|
39 |
+
FA = Task("scores", "FA", "FA - 基礎分析", TaskType.FA, True) # Fundamental Analysis - 基礎分析
|
40 |
+
HE = Task("scores", "HE", "HE - 言語理解", TaskType.HE, True) # Human Examination - 言語理解
|
41 |
+
MC = Task(
|
42 |
+
"scores", "MC", "MC - 多肢選択式質問応答", TaskType.MC, True
|
43 |
+
) # Multiple Choice question answering - 多肢選択式質問応答
|
44 |
+
MR = Task("scores", "MR", "MR - 数学的推論", TaskType.MR, True) # Mathematical Reasoning - 数学的推論
|
45 |
+
MT = Task("scores", "MT", "MT - 機械翻訳", TaskType.MT, True) # Machine Translation - 機械翻訳
|
46 |
+
NLI = Task("scores", "NLI", "NLI - 自然言語推論", TaskType.NLI, True) # Natural Language Inference - 自然言語推論
|
47 |
+
QA = Task("scores", "QA", "QA - 質問応答", TaskType.QA, True) # Question Answering - 質問応答
|
48 |
+
RC = Task("scores", "RC", "RC - 読解力", TaskType.RC, True) # Reading Comprehension - 読解力
|
49 |
+
SUM = Task("scores", "SUM", "SUM - 要約", TaskType.SUM, True) # Summarization - 要約
|
50 |
+
alt_e_to_j_bert_score_ja_f1 = Task("scores", "alt-e-to-j_bert_score_ja_f1", "ALT E to J BERT Score", TaskType.MT)
|
51 |
+
alt_e_to_j_bleu_ja = Task("scores", "alt-e-to-j_bleu_ja", "ALT E to J BLEU", TaskType.MT)
|
52 |
+
alt_e_to_j_comet_wmt22 = Task("scores", "alt-e-to-j_comet_wmt22", "ALT E to J COMET WMT22", TaskType.MT)
|
53 |
+
alt_j_to_e_bert_score_en_f1 = Task("scores", "alt-j-to-e_bert_score_en_f1", "ALT J to E BERT Score", TaskType.MT)
|
54 |
+
alt_j_to_e_bleu_en = Task("scores", "alt-j-to-e_bleu_en", "ALT J to E BLEU", TaskType.MT)
|
55 |
+
alt_j_to_e_comet_wmt22 = Task("scores", "alt-j-to-e_comet_wmt22", "ALT J to E COMET WMT22", TaskType.MT)
|
56 |
+
chabsa_set_f1 = Task("scores", "chabsa_set_f1", "ChABSA", TaskType.EL)
|
57 |
+
commonsensemoralja_exact_match = Task(
|
58 |
+
"scores", "commonsensemoralja_exact_match", "CommonSenseMoralJA", TaskType.MC
|
59 |
+
)
|
60 |
+
jamp_exact_match = Task("scores", "jamp_exact_match", "JAMP", TaskType.NLI)
|
61 |
+
janli_exact_match = Task("scores", "janli_exact_match", "JANLI", TaskType.NLI)
|
62 |
+
jcommonsenseqa_exact_match = Task("scores", "jcommonsenseqa_exact_match", "JCommonSenseQA", TaskType.MC)
|
63 |
+
jemhopqa_char_f1 = Task("scores", "jemhopqa_char_f1", "JEMHopQA", TaskType.QA)
|
64 |
+
jmmlu_exact_match = Task("scores", "jmmlu_exact_match", "JMMLU", TaskType.HE)
|
65 |
+
jnli_exact_match = Task("scores", "jnli_exact_match", "JNLI", TaskType.NLI)
|
66 |
+
jsem_exact_match = Task("scores", "jsem_exact_match", "JSEM", TaskType.NLI)
|
67 |
+
jsick_exact_match = Task("scores", "jsick_exact_match", "JSICK", TaskType.NLI)
|
68 |
+
jsquad_char_f1 = Task("scores", "jsquad_char_f1", "JSquad", TaskType.RC)
|
69 |
jsts_pearson = Task(
|
70 |
+
"scores", "jsts_pearson", "JSTS (Pearson) - 意味的類似度", TaskType.STS
|
71 |
) # Semantic Textual Similarity - 意味的類似度
|
72 |
jsts_spearman = Task(
|
73 |
+
"scores", "jsts_spearman", "JSTS (Spearman) - 意味的類似度", TaskType.STS
|
74 |
) # Semantic Textual Similarity - 意味的類似度
|
75 |
+
kuci_exact_match = Task("scores", "kuci_exact_match", "KUCI", TaskType.MC)
|
76 |
+
mawps_exact_match = Task("scores", "mawps_exact_match", "MAWPS", TaskType.MR)
|
77 |
+
mbpp_code_exec = Task("scores", "mbpp_code_exec", "MBPP(exec)", TaskType.CG)
|
78 |
+
mbpp_pylint_check = Task("scores", "mbpp_pylint_check", "MBPP(pylint)", TaskType.CG)
|
79 |
+
mmlu_en_exact_match = Task("scores", "mmlu_en_exact_match", "MMLU", TaskType.HE)
|
80 |
+
niilc_char_f1 = Task("scores", "niilc_char_f1", "NIILC", TaskType.QA)
|
81 |
+
wiki_coreference_set_f1 = Task("scores", "wiki_coreference_set_f1", "Wiki Coreference", TaskType.FA)
|
82 |
+
wiki_dependency_set_f1 = Task("scores", "wiki_dependency_set_f1", "Wiki Dependency", TaskType.FA)
|
83 |
+
wiki_ner_set_f1 = Task("scores", "wiki_ner_set_f1", "Wiki NER", TaskType.FA)
|
84 |
+
wiki_pas_set_f1 = Task("scores", "wiki_pas_set_f1", "Wiki PAS", TaskType.FA)
|
85 |
+
wiki_reading_char_f1 = Task("scores", "wiki_reading_char_f1", "Wiki Reading", TaskType.FA)
|
86 |
wikicorpus_e_to_j_bert_score_ja_f1 = Task(
|
87 |
+
"scores", "wikicorpus-e-to-j_bert_score_ja_f1", "WikiCorpus E to J BERT Score", TaskType.MT
|
88 |
+
)
|
89 |
+
wikicorpus_e_to_j_bleu_ja = Task("scores", "wikicorpus-e-to-j_bleu_ja", "WikiCorpus E to J BLEU", TaskType.MT)
|
90 |
+
wikicorpus_e_to_j_comet_wmt22 = Task(
|
91 |
+
"scores", "wikicorpus-e-to-j_comet_wmt22", "WikiCorpus E to J COMET WMT22", TaskType.MT
|
92 |
)
|
|
|
|
|
93 |
wikicorpus_j_to_e_bert_score_en_f1 = Task(
|
94 |
+
"scores", "wikicorpus-j-to-e_bert_score_en_f1", "WikiCorpus J to E BERT Score", TaskType.MT
|
95 |
+
)
|
96 |
+
wikicorpus_j_to_e_bleu_en = Task("scores", "wikicorpus-j-to-e_bleu_en", "WikiCorpus J to E BLEU", TaskType.MT)
|
97 |
+
wikicorpus_j_to_e_comet_wmt22 = Task(
|
98 |
+
"scores", "wikicorpus-j-to-e_comet_wmt22", "WikiCorpus J to E COMET WMT22", TaskType.MT
|
99 |
)
|
100 |
+
xlsum_ja_bert_score_ja_f1 = Task("scores", "xlsum_ja_bert_score_ja_f1", "XL-Sum JA BERT Score", TaskType.SUM)
|
101 |
+
xlsum_ja_bleu_ja = Task("scores", "xlsum_ja_bleu_ja", "XL-Sum JA BLEU", TaskType.SUM)
|
102 |
+
xlsum_ja_rouge1 = Task("scores", "xlsum_ja_rouge1", "XL-Sum ROUGE1", TaskType.SUM)
|
103 |
+
xlsum_ja_rouge2 = Task("scores", "xlsum_ja_rouge2", "XL-Sum ROUGE2", TaskType.SUM)
|
|
|
|
|
104 |
# xlsum_ja_rouge2_scaling = Task("scores", "xlsum_ja_rouge2_scaling", "XL-Sum JA ROUGE2 Scaling")
|
105 |
+
xlsum_ja_rougeLsum = Task("scores", "xlsum_ja_rougeLsum", "XL-Sum ROUGE-Lsum", TaskType.SUM)
|
106 |
|
107 |
|
108 |
NUM_FEWSHOT = 0 # Change with your few shot
|
src/display/utils.py
CHANGED
@@ -3,7 +3,7 @@ from enum import Enum
|
|
3 |
|
4 |
import pandas as pd
|
5 |
|
6 |
-
from src.about import Tasks
|
7 |
|
8 |
|
9 |
def fields(raw_class):
|
@@ -21,6 +21,8 @@ class ColumnContent:
|
|
21 |
hidden: bool = False
|
22 |
never_hidden: bool = False
|
23 |
dummy: bool = False
|
|
|
|
|
24 |
|
25 |
|
26 |
## Leaderboard columns
|
@@ -31,7 +33,15 @@ auto_eval_column_dict.append(["model", ColumnContent, ColumnContent("Model", "ma
|
|
31 |
# Scores
|
32 |
# auto_eval_column_dict.append(["average", ColumnContent, ColumnContent("Average ⬆️", "number", True)])
|
33 |
for task in Tasks:
|
34 |
-
auto_eval_column_dict.append(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
35 |
# Model information
|
36 |
auto_eval_column_dict.append(["model_type", ColumnContent, ColumnContent("Type", "str", False)])
|
37 |
auto_eval_column_dict.append(["architecture", ColumnContent, ColumnContent("Architecture", "str", False)])
|
|
|
3 |
|
4 |
import pandas as pd
|
5 |
|
6 |
+
from src.about import Tasks, TaskType
|
7 |
|
8 |
|
9 |
def fields(raw_class):
|
|
|
21 |
hidden: bool = False
|
22 |
never_hidden: bool = False
|
23 |
dummy: bool = False
|
24 |
+
task_type: TaskType = TaskType.NotTask
|
25 |
+
average: bool = False
|
26 |
|
27 |
|
28 |
## Leaderboard columns
|
|
|
33 |
# Scores
|
34 |
# auto_eval_column_dict.append(["average", ColumnContent, ColumnContent("Average ⬆️", "number", True)])
|
35 |
for task in Tasks:
|
36 |
+
auto_eval_column_dict.append(
|
37 |
+
[
|
38 |
+
task.name,
|
39 |
+
ColumnContent,
|
40 |
+
ColumnContent(
|
41 |
+
task.value.col_name, "number", True, task_type=task.value.task_type, average=task.value.average
|
42 |
+
),
|
43 |
+
]
|
44 |
+
)
|
45 |
# Model information
|
46 |
auto_eval_column_dict.append(["model_type", ColumnContent, ColumnContent("Type", "str", False)])
|
47 |
auto_eval_column_dict.append(["architecture", ColumnContent, ColumnContent("Architecture", "str", False)])
|