Muennighoff
commited on
Commit
•
cd12b7e
1
Parent(s):
be9a366
Add code
Browse files- eval_ru.sh +792 -0
- launch.sh +53 -0
- sbatch_mtf_4b_ru.sh +147 -0
- train_ru.txt +1 -0
eval_ru.sh
ADDED
@@ -0,0 +1,792 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/bin/bash
|
2 |
+
#SBATCH --job-name=evaluate_t0
|
3 |
+
#SBATCH --nodes=1
|
4 |
+
#SBATCH --ntasks-per-node=1 # crucial - only 1 task per dist per node!
|
5 |
+
#SBATCH --cpus-per-task=32 # number of cores per tasks
|
6 |
+
#SBATCH --hint=nomultithread # we get physical cores not logical
|
7 |
+
#SBATCH --gpus-per-node=mi250:1
|
8 |
+
#SBATCH --time 5:00:00 # maximum execution time (HH:MM:SS)
|
9 |
+
#SBATCH --output=logs/%j.out # output file name
|
10 |
+
#SBATCH -e logs/%j.err
|
11 |
+
#SBATCH --account=project_462000119
|
12 |
+
#SBATCH -p pilot
|
13 |
+
#SBATCH --exclusive=user
|
14 |
+
|
15 |
+
# VALIDATION:
|
16 |
+
# --array=0-168
|
17 |
+
|
18 |
+
# L1
|
19 |
+
# --array=0-169
|
20 |
+
|
21 |
+
# L2
|
22 |
+
# --array=0-84
|
23 |
+
|
24 |
+
# MT L1
|
25 |
+
# --array=0-69
|
26 |
+
|
27 |
+
# MT L2
|
28 |
+
# --array=0-89
|
29 |
+
|
30 |
+
# XNLIMTHT:
|
31 |
+
# --array=0-79
|
32 |
+
|
33 |
+
|
34 |
+
set -x -e
|
35 |
+
|
36 |
+
# source $six_ALL_CCFRWORK/start-py38-pt111
|
37 |
+
# conda activate thomas_t_zero_evaluation
|
38 |
+
source /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/bin/activate
|
39 |
+
|
40 |
+
# CHECKPOINT_PATH=/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3/bloom-6b3
|
41 |
+
# CHECKPOINT_PATH=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-mtf/global_step1000
|
42 |
+
# CHECKPOINT_PATH=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-mtf/bloomz7b1
|
43 |
+
CHECKPOINT_PATH=bloomz-560m
|
44 |
+
CHECKPOINT_PATH=bloomz-7b1-500m-ru
|
45 |
+
CHECKPOINT_PATH=bloomz-7b1-100m-ru
|
46 |
+
CHECKPOINT_PATH=bloomz-7b1-xp3ru
|
47 |
+
CHECKPOINT_PATH=bloomz-7b1-4b-ru
|
48 |
+
# WORKDIR=/gpfswork/rech/six/commun/code/tr13f-6B3-ml-t0
|
49 |
+
|
50 |
+
# pushd $WORKDIR
|
51 |
+
|
52 |
+
export TRANSFORMERS_OFFLINE=1
|
53 |
+
export HF_TRANSFORMERS_OFFLINE=1
|
54 |
+
export HF_DATASETS_OFFLINE=1
|
55 |
+
export HF_DATASETS_CACHE=/scratch/project_462000119/ds_cache
|
56 |
+
|
57 |
+
OUTPUT_DIR=$CHECKPOINT_PATH/evaluation
|
58 |
+
mkdir -p $OUTPUT_DIR
|
59 |
+
|
60 |
+
# Validation
|
61 |
+
DATASETS_AND_CONFIGS_VAL=(
|
62 |
+
head_qa,en,en,"multiple_choice_q_and_a_index_with_context_en",validation
|
63 |
+
head_qa,en,en,"multiple_choice_q_and_a_en",validation
|
64 |
+
head_qa,en,en,"multiple_choice_q_and_a_index_en",validation
|
65 |
+
head_qa,en,en,"multiple_choice_a_and_q_with_context_en",validation
|
66 |
+
head_qa,en,en,"multiple_choice_a_and_q_en",validation
|
67 |
+
head_qa,es,en,"multiple_choice_q_and_a_index_with_context_en",validation
|
68 |
+
head_qa,es,en,"multiple_choice_q_and_a_en",validation
|
69 |
+
head_qa,es,en,"multiple_choice_q_and_a_index_en",validation
|
70 |
+
head_qa,es,en,"multiple_choice_a_and_q_with_context_en",validation
|
71 |
+
head_qa,es,en,"multiple_choice_a_and_q_en",validation
|
72 |
+
climate_fever,None,None,"first_evidence_and_claim_itemization",test
|
73 |
+
climate_fever,None,None,"claim_and_all_supporting_evidences",test
|
74 |
+
climate_fever,None,None,"fifth_evidence_and_claim_itemization",test
|
75 |
+
climate_fever,None,None,"third_evidence_claim_pair",test
|
76 |
+
climate_fever,None,None,"second_evidence_and_claim_itemization",test
|
77 |
+
codah,codah,None,"interrogative_instruction_after_sentence_and_choices",train
|
78 |
+
codah,codah,None,"affirmative_instruction_before_sentence_and_choices",train
|
79 |
+
codah,codah,None,"affirmative_instruction_after_sentence_and_choices",train
|
80 |
+
aqua_rat,raw,None,"select_the_best_option",validation
|
81 |
+
aqua_rat,raw,None,"answer_quiz",validation
|
82 |
+
aqua_rat,raw,None,"Answer questions from options",validation
|
83 |
+
commonsense_qa,None,None,"answer_given_question_without_options",validation
|
84 |
+
commonsense_qa,None,None,"question_answering",validation
|
85 |
+
commonsense_qa,None,None,"most_suitable_answer",validation
|
86 |
+
amazon_reviews_multi,en,en,"prompt_title_to_star",validation
|
87 |
+
amazon_reviews_multi,en,en,"prompt_review_to_star",validation
|
88 |
+
amazon_reviews_multi,en,en,"prompt_body_title_to_star",validation
|
89 |
+
amazon_reviews_multi,zh,en,"prompt_title_to_star",validation
|
90 |
+
amazon_reviews_multi,zh,en,"prompt_review_to_star",validation
|
91 |
+
amazon_reviews_multi,zh,en,"prompt_body_title_to_star",validation
|
92 |
+
amazon_reviews_multi,fr,en,"prompt_title_to_star",validation
|
93 |
+
amazon_reviews_multi,fr,en,"prompt_review_to_star",validation
|
94 |
+
amazon_reviews_multi,fr,en,"prompt_body_title_to_star",validation
|
95 |
+
amazon_reviews_multi,es,en,"prompt_title_to_star",validation
|
96 |
+
amazon_reviews_multi,es,en,"prompt_review_to_star",validation
|
97 |
+
amazon_reviews_multi,es,en,"prompt_body_title_to_star",validation
|
98 |
+
art,None,None,"choose_hypothesis_options",validation
|
99 |
+
art,None,None,"choose_hypothesis_believable",validation
|
100 |
+
art,None,None,"choose_hypothesis",validation
|
101 |
+
art,None,None,"choose_hypothesis_desc",validation
|
102 |
+
art,None,None,"choose_hypothesis_likely",validation
|
103 |
+
banking77,None,None,"help_page_topic",test
|
104 |
+
banking77,None,None,"direct_to_which_department",test
|
105 |
+
banking77,None,None,"rephrase_as_banking_term",test
|
106 |
+
blbooksgenre,title_genre_classifiction,None,"multi-choice",train
|
107 |
+
blbooksgenre,title_genre_classifiction,None,"premise_context_first",train
|
108 |
+
blbooksgenre,title_genre_classifiction,None,"classify",train
|
109 |
+
blimp,adjunct_island,None,"grammatical_between_1_2",train
|
110 |
+
blimp,adjunct_island,None,"grammatical_between_A_B",train
|
111 |
+
blimp,adjunct_island,None,"grammatical_which_one_1_2",train
|
112 |
+
blimp,adjunct_island,None,"single_sentence_bad_yes_no",train
|
113 |
+
blimp,adjunct_island,None,"single_sentence_good_yes_no",train
|
114 |
+
conv_ai_3,None,None,"clarification_needed",validation
|
115 |
+
conv_ai_3,None,None,"score_give_number",validation
|
116 |
+
conv_ai_3,None,None,"ambiguous",validation
|
117 |
+
conv_ai_3,None,None,"directly_answer",validation
|
118 |
+
conv_ai_3,None,None,"score_how_much",validation
|
119 |
+
craigslist_bargains,None,None,"good deal for seller no list price implicit",validation
|
120 |
+
craigslist_bargains,None,None,"good deal for seller no list price",validation
|
121 |
+
craigslist_bargains,None,None,"good deal for seller",validation
|
122 |
+
craigslist_bargains,None,None,"best deal",validation
|
123 |
+
ecthr_cases,alleged-violation-prediction,None,"implicit_advice_number",validation
|
124 |
+
ecthr_cases,alleged-violation-prediction,None,"ecthr_alleged_articles_declaration_at_end",validation
|
125 |
+
ecthr_cases,alleged-violation-prediction,None,"ecthr_alleged_articles_question_at_start",validation
|
126 |
+
ecthr_cases,alleged-violation-prediction,None,"implicit_judgment_paragraph",validation
|
127 |
+
ecthr_cases,alleged-violation-prediction,None,"confirm number of violated articles",validation
|
128 |
+
emo,None,None,"persons_describe",validation
|
129 |
+
emo,None,None,"final_message",validation
|
130 |
+
emo,None,None,"what_emotion_do_you_think",validation
|
131 |
+
emo,None,None,"emotional_state",validation
|
132 |
+
emo,None,None,"dialogue_between",validation
|
133 |
+
emotion,None,None,"choose_the_best_emotion_label",test
|
134 |
+
emotion,None,None,"reply_with_emoation_label",test
|
135 |
+
emotion,None,None,"answer_with_class_label",test
|
136 |
+
emotion,None,None,"answer_question_with_emotion_label",test
|
137 |
+
financial_phrasebank,sentences_allagree,None,"share_price_option",train
|
138 |
+
financial_phrasebank,sentences_allagree,None,"sentiment",train
|
139 |
+
financial_phrasebank,sentences_allagree,None,"word_comes_to_mind",train
|
140 |
+
financial_phrasebank,sentences_allagree,None,"complementary_industries",train
|
141 |
+
financial_phrasebank,sentences_allagree,None,"bullish_neutral_bearish",train
|
142 |
+
glue,cola,None,"Make sense yes no",validation
|
143 |
+
glue,cola,None,"is_this_correct",validation
|
144 |
+
glue,cola,None,"editing",validation
|
145 |
+
glue,cola,None,"Following sentence acceptable",validation
|
146 |
+
glue,cola,None,"Previous sentence acceptable",validation
|
147 |
+
glue,sst2,None,"positive negative after",validation
|
148 |
+
glue,sst2,None,"review",validation
|
149 |
+
glue,sst2,None,"said",validation
|
150 |
+
glue,sst2,None,"following positive negative",validation
|
151 |
+
glue,sst2,None,"happy or mad",validation
|
152 |
+
health_fact,None,None,"claim_veracity_classification_after_reading_I_believe",validation
|
153 |
+
health_fact,None,None,"claim_explanation_classification",validation
|
154 |
+
health_fact,None,None,"claim_veracity_classification_tell_me",validation
|
155 |
+
hlgd,None,None,"is_same_event_with_time_interrogative_related",validation
|
156 |
+
hlgd,None,None,"is_same_event_interrogative_talk",validation
|
157 |
+
hlgd,None,None,"is_same_event_with_time_interrogative_talk",validation
|
158 |
+
hlgd,None,None,"is_same_event_refer",validation
|
159 |
+
hlgd,None,None,"is_same_event_editor_asks",validation
|
160 |
+
hyperpartisan_news_detection,byarticle,None,"consider_does_it_follow_a_hyperpartisan_argumentation",train
|
161 |
+
hyperpartisan_news_detection,byarticle,None,"follows_hyperpartisan_argumentation",train
|
162 |
+
hyperpartisan_news_detection,byarticle,None,"consume_with_caution",train
|
163 |
+
hyperpartisan_news_detection,byarticle,None,"extreme_left_wing_or_right_wing",train
|
164 |
+
hyperpartisan_news_detection,byarticle,None,"consider_it_exhibits_extreme_one_sidedness",train
|
165 |
+
liar,None,None,"Given statement guess category",validation
|
166 |
+
lince,sa_spaeng,None,"original poster expressed sentiment",validation
|
167 |
+
lince,sa_spaeng,None,"sentiment trying to express",validation
|
168 |
+
lince,sa_spaeng,None,"express sentiment",validation
|
169 |
+
lince,sa_spaeng,None,"negation template",validation
|
170 |
+
lince,sa_spaeng,None,"the author seem",validation
|
171 |
+
math_qa,None,None,"choose_correct_og",test
|
172 |
+
math_qa,None,None,"pick_the_correct",test
|
173 |
+
math_qa,None,None,"first_choice_then_problem",test
|
174 |
+
math_qa,None,None,"problem_set_type",test
|
175 |
+
math_qa,None,None,"gre_problem",test
|
176 |
+
movie_rationales,None,None,"Standard binary sentiment analysis",validation
|
177 |
+
movie_rationales,None,None,"Evidences sentiment classification",validation
|
178 |
+
movie_rationales,None,None,"Evidences + review",validation
|
179 |
+
movie_rationales,None,None,"Generate evidences and sentiment",validation
|
180 |
+
mwsc,None,None,"in-the-sentence-question-first",validation
|
181 |
+
mwsc,None,None,"what-think",validation
|
182 |
+
mwsc,None,None,"in-the-sentence",validation
|
183 |
+
mwsc,None,None,"options-or",validation
|
184 |
+
mwsc,None,None,"is-correct",validation
|
185 |
+
poem_sentiment,None,None,"positive_or_negative_sentiment_variation_2",validation
|
186 |
+
poem_sentiment,None,None,"question_answer_format",validation
|
187 |
+
poem_sentiment,None,None,"guess_sentiment_without_options_variation_1",validation
|
188 |
+
poem_sentiment,None,None,"positive_or_negative_sentiment_variation_1",validation
|
189 |
+
poem_sentiment,None,None,"most_appropriate_sentiment",validation
|
190 |
+
onestop_english,None,None,"esl_context",train
|
191 |
+
onestop_english,None,None,"ara_context",train
|
192 |
+
onestop_english,None,None,"determine_reading_level_from_the_first_three_sentences",train
|
193 |
+
onestop_english,None,None,"esl_variation",train
|
194 |
+
onestop_english,None,None,"assess",train
|
195 |
+
pubmed_qa,pqa_labeled,None,"Long Answer to Final Decision",train
|
196 |
+
pubmed_qa,pqa_labeled,None,"Question Answering (Short)",train
|
197 |
+
riddle_sense,None,None,"most_suitable_answer",validation
|
198 |
+
riddle_sense,None,None,"answer_given_question_without_options",validation
|
199 |
+
riddle_sense,None,None,"question_to_answer_index",validation
|
200 |
+
riddle_sense,None,None,"question_answering",validation
|
201 |
+
scicite,None,None,"Classify intent w/section (select choice)",validation
|
202 |
+
scicite,None,None,"Classify intent (choices first)",validation
|
203 |
+
scicite,None,None,"Classify intent (select choice)",validation
|
204 |
+
scicite,None,None,"Classify intent",validation
|
205 |
+
scicite,None,None,"can_describe",validation
|
206 |
+
selqa,answer_selection_analysis,None,"is-he-talking-about",validation
|
207 |
+
selqa,answer_selection_analysis,None,"would-make-sense-qu-rand",validation
|
208 |
+
selqa,answer_selection_analysis,None,"make-sense-rand",validation
|
209 |
+
selqa,answer_selection_analysis,None,"which-answer-1st-vs-random",validation
|
210 |
+
snips_built_in_intents,None,None,"voice_intent",train
|
211 |
+
snips_built_in_intents,None,None,"categorize_query",train
|
212 |
+
snips_built_in_intents,None,None,"intent_query",train
|
213 |
+
snips_built_in_intents,None,None,"categorize_query_brief",train
|
214 |
+
snips_built_in_intents,None,None,"query_intent",train
|
215 |
+
)
|
216 |
+
|
217 |
+
DATASETS_AND_CONFIGS_L1MISS=(
|
218 |
+
story_cloze,2016,None,"Story Continuation and Options",validation
|
219 |
+
story_cloze,2016,None,"Answer Given options",validation
|
220 |
+
story_cloze,2016,None,"Novel Correct Ending",validation
|
221 |
+
story_cloze,2016,None,"Generate Ending",validation
|
222 |
+
story_cloze,2016,None,"Choose Story Ending",validation
|
223 |
+
)
|
224 |
+
|
225 |
+
DATASETS_AND_CONFIGS_L1=(
|
226 |
+
super_glue,copa,None,"best_option",validation
|
227 |
+
super_glue,copa,None,"C1 or C2? premise, so/because…",validation
|
228 |
+
super_glue,copa,None,"i_am_hesitating",validation
|
229 |
+
super_glue,copa,None,"cause_effect",validation
|
230 |
+
super_glue,copa,None,"plausible_alternatives",validation
|
231 |
+
super_glue,rte,None,"MNLI crowdsource",validation
|
232 |
+
super_glue,rte,None,"GPT-3 style",validation
|
233 |
+
super_glue,rte,None,"does it follow that",validation
|
234 |
+
super_glue,rte,None,"should assume",validation
|
235 |
+
super_glue,rte,None,"guaranteed true",validation
|
236 |
+
anli,dev_r1,None,"guaranteed/possible/impossible",dev_r1
|
237 |
+
anli,dev_r1,None,"MNLI crowdsource",dev_r1
|
238 |
+
anli,dev_r1,None,"GPT-3 style",dev_r1
|
239 |
+
anli,dev_r1,None,"justified in saying",dev_r1
|
240 |
+
anli,dev_r1,None,"can we infer",dev_r1
|
241 |
+
anli,dev_r2,None,"guaranteed/possible/impossible",dev_r2
|
242 |
+
anli,dev_r2,None,"MNLI crowdsource",dev_r2
|
243 |
+
anli,dev_r2,None,"GPT-3 style",dev_r2
|
244 |
+
anli,dev_r2,None,"justified in saying",dev_r2
|
245 |
+
anli,dev_r2,None,"can we infer",dev_r2
|
246 |
+
anli,dev_r3,None,"guaranteed/possible/impossible",dev_r3
|
247 |
+
anli,dev_r3,None,"MNLI crowdsource",dev_r3
|
248 |
+
anli,dev_r3,None,"GPT-3 style",dev_r3
|
249 |
+
anli,dev_r3,None,"justified in saying",dev_r3
|
250 |
+
anli,dev_r3,None,"can we infer",dev_r3
|
251 |
+
super_glue,cb,None,"guaranteed/possible/impossible",validation
|
252 |
+
super_glue,cb,None,"MNLI crowdsource",validation
|
253 |
+
super_glue,cb,None,"GPT-3 style",validation
|
254 |
+
super_glue,cb,None,"justified in saying",validation
|
255 |
+
super_glue,cb,None,"can we infer",validation
|
256 |
+
winogrande,winogrande_xl,None,"underscore refer to",validation
|
257 |
+
winogrande,winogrande_xl,None,"Replace",validation
|
258 |
+
winogrande,winogrande_xl,None,"stand for",validation
|
259 |
+
winogrande,winogrande_xl,None,"does underscore refer to",validation
|
260 |
+
winogrande,winogrande_xl,None,"True or False",validation
|
261 |
+
story_cloze,2016,None,"Story Continuation and Options",validation
|
262 |
+
story_cloze,2016,None,"Answer Given options",validation
|
263 |
+
story_cloze,2016,None,"Novel Correct Ending",validation
|
264 |
+
story_cloze,2016,None,"Generate Ending",validation
|
265 |
+
story_cloze,2016,None,"Choose Story Ending",validation
|
266 |
+
Muennighoff/xstory_cloze,ar,en,"Story Continuation and Options",validation
|
267 |
+
Muennighoff/xstory_cloze,ar,en,"Answer Given options",validation
|
268 |
+
Muennighoff/xstory_cloze,ar,en,"Novel Correct Ending",validation
|
269 |
+
Muennighoff/xstory_cloze,ar,en,"Generate Ending",validation
|
270 |
+
Muennighoff/xstory_cloze,ar,en,"Choose Story Ending",validation
|
271 |
+
Muennighoff/xstory_cloze,es,en,"Story Continuation and Options",validation
|
272 |
+
Muennighoff/xstory_cloze,es,en,"Answer Given options",validation
|
273 |
+
Muennighoff/xstory_cloze,es,en,"Novel Correct Ending",validation
|
274 |
+
Muennighoff/xstory_cloze,es,en,"Generate Ending",validation
|
275 |
+
Muennighoff/xstory_cloze,es,en,"Choose Story Ending",validation
|
276 |
+
Muennighoff/xstory_cloze,eu,en,"Story Continuation and Options",validation
|
277 |
+
Muennighoff/xstory_cloze,eu,en,"Answer Given options",validation
|
278 |
+
Muennighoff/xstory_cloze,eu,en,"Novel Correct Ending",validation
|
279 |
+
Muennighoff/xstory_cloze,eu,en,"Generate Ending",validation
|
280 |
+
Muennighoff/xstory_cloze,eu,en,"Choose Story Ending",validation
|
281 |
+
Muennighoff/xstory_cloze,id,en,"Story Continuation and Options",validation
|
282 |
+
Muennighoff/xstory_cloze,id,en,"Answer Given options",validation
|
283 |
+
Muennighoff/xstory_cloze,id,en,"Novel Correct Ending",validation
|
284 |
+
Muennighoff/xstory_cloze,id,en,"Generate Ending",validation
|
285 |
+
Muennighoff/xstory_cloze,id,en,"Choose Story Ending",validation
|
286 |
+
Muennighoff/xstory_cloze,hi,en,"Story Continuation and Options",validation
|
287 |
+
Muennighoff/xstory_cloze,hi,en,"Answer Given options",validation
|
288 |
+
Muennighoff/xstory_cloze,hi,en,"Novel Correct Ending",validation
|
289 |
+
Muennighoff/xstory_cloze,hi,en,"Generate Ending",validation
|
290 |
+
Muennighoff/xstory_cloze,hi,en,"Choose Story Ending",validation
|
291 |
+
Muennighoff/xstory_cloze,sw,en,"Story Continuation and Options",validation
|
292 |
+
Muennighoff/xstory_cloze,sw,en,"Answer Given options",validation
|
293 |
+
Muennighoff/xstory_cloze,sw,en,"Novel Correct Ending",validation
|
294 |
+
Muennighoff/xstory_cloze,sw,en,"Generate Ending",validation
|
295 |
+
Muennighoff/xstory_cloze,sw,en,"Choose Story Ending",validation
|
296 |
+
Muennighoff/xstory_cloze,te,en,"Story Continuation and Options",validation
|
297 |
+
Muennighoff/xstory_cloze,te,en,"Answer Given options",validation
|
298 |
+
Muennighoff/xstory_cloze,te,en,"Novel Correct Ending",validation
|
299 |
+
Muennighoff/xstory_cloze,te,en,"Generate Ending",validation
|
300 |
+
Muennighoff/xstory_cloze,te,en,"Choose Story Ending",validation
|
301 |
+
Muennighoff/xstory_cloze,zh,en,"Story Continuation and Options",validation
|
302 |
+
Muennighoff/xstory_cloze,zh,en,"Answer Given options",validation
|
303 |
+
Muennighoff/xstory_cloze,zh,en,"Novel Correct Ending",validation
|
304 |
+
Muennighoff/xstory_cloze,zh,en,"Generate Ending",validation
|
305 |
+
Muennighoff/xstory_cloze,zh,en,"Choose Story Ending",validation
|
306 |
+
xnli,ar,en,"guaranteed/possible/impossible",validation
|
307 |
+
xnli,ar,en,"MNLI crowdsource",validation
|
308 |
+
xnli,ar,en,"GPT-3 style",validation
|
309 |
+
xnli,ar,en,"justified in saying",validation
|
310 |
+
xnli,ar,en,"can we infer",validation
|
311 |
+
xnli,en,en,"guaranteed/possible/impossible",validation
|
312 |
+
xnli,en,en,"MNLI crowdsource",validation
|
313 |
+
xnli,en,en,"GPT-3 style",validation
|
314 |
+
xnli,en,en,"justified in saying",validation
|
315 |
+
xnli,en,en,"can we infer",validation
|
316 |
+
xnli,es,en,"guaranteed/possible/impossible",validation
|
317 |
+
xnli,es,en,"MNLI crowdsource",validation
|
318 |
+
xnli,es,en,"GPT-3 style",validation
|
319 |
+
xnli,es,en,"justified in saying",validation
|
320 |
+
xnli,es,en,"can we infer",validation
|
321 |
+
xnli,fr,en,"guaranteed/possible/impossible",validation
|
322 |
+
xnli,fr,en,"MNLI crowdsource",validation
|
323 |
+
xnli,fr,en,"GPT-3 style",validation
|
324 |
+
xnli,fr,en,"justified in saying",validation
|
325 |
+
xnli,fr,en,"can we infer",validation
|
326 |
+
xnli,hi,en,"guaranteed/possible/impossible",validation
|
327 |
+
xnli,hi,en,"MNLI crowdsource",validation
|
328 |
+
xnli,hi,en,"GPT-3 style",validation
|
329 |
+
xnli,hi,en,"justified in saying",validation
|
330 |
+
xnli,hi,en,"can we infer",validation
|
331 |
+
xnli,sw,en,"guaranteed/possible/impossible",validation
|
332 |
+
xnli,sw,en,"MNLI crowdsource",validation
|
333 |
+
xnli,sw,en,"GPT-3 style",validation
|
334 |
+
xnli,sw,en,"justified in saying",validation
|
335 |
+
xnli,sw,en,"can we infer",validation
|
336 |
+
xnli,ur,en,"guaranteed/possible/impossible",validation
|
337 |
+
xnli,ur,en,"MNLI crowdsource",validation
|
338 |
+
xnli,ur,en,"GPT-3 style",validation
|
339 |
+
xnli,ur,en,"justified in saying",validation
|
340 |
+
xnli,ur,en,"can we infer",validation
|
341 |
+
xnli,vi,en,"guaranteed/possible/impossible",validation
|
342 |
+
xnli,vi,en,"MNLI crowdsource",validation
|
343 |
+
xnli,vi,en,"GPT-3 style",validation
|
344 |
+
xnli,vi,en,"justified in saying",validation
|
345 |
+
xnli,vi,en,"can we infer",validation
|
346 |
+
xnli,zh,en,"guaranteed/possible/impossible",validation
|
347 |
+
xnli,zh,en,"MNLI crowdsource",validation
|
348 |
+
xnli,zh,en,"GPT-3 style",validation
|
349 |
+
xnli,zh,en,"justified in saying",validation
|
350 |
+
xnli,zh,en,"can we infer",validation
|
351 |
+
xcopa,id,en,"best_option",validation
|
352 |
+
xcopa,id,en,"C1 or C2? premise, so/because…",validation
|
353 |
+
xcopa,id,en,"i_am_hesitating",validation
|
354 |
+
xcopa,id,en,"cause_effect",validation
|
355 |
+
xcopa,id,en,"plausible_alternatives",validation
|
356 |
+
xcopa,sw,en,"best_option",validation
|
357 |
+
xcopa,sw,en,"C1 or C2? premise, so/because…",validation
|
358 |
+
xcopa,sw,en,"i_am_hesitating",validation
|
359 |
+
xcopa,sw,en,"cause_effect",validation
|
360 |
+
xcopa,sw,en,"plausible_alternatives",validation
|
361 |
+
xcopa,ta,en,"best_option",validation
|
362 |
+
xcopa,ta,en,"C1 or C2? premise, so/because…",validation
|
363 |
+
xcopa,ta,en,"i_am_hesitating",validation
|
364 |
+
xcopa,ta,en,"cause_effect",validation
|
365 |
+
xcopa,ta,en,"plausible_alternatives",validation
|
366 |
+
xcopa,vi,en,"best_option",validation
|
367 |
+
xcopa,vi,en,"C1 or C2? premise, so/because…",validation
|
368 |
+
xcopa,vi,en,"i_am_hesitating",validation
|
369 |
+
xcopa,vi,en,"cause_effect",validation
|
370 |
+
xcopa,vi,en,"plausible_alternatives",validation
|
371 |
+
xcopa,zh,en,"best_option",validation
|
372 |
+
xcopa,zh,en,"C1 or C2? premise, so/because…",validation
|
373 |
+
xcopa,zh,en,"i_am_hesitating",validation
|
374 |
+
xcopa,zh,en,"cause_effect",validation
|
375 |
+
xcopa,zh,en,"plausible_alternatives",validation
|
376 |
+
Muennighoff/xwinograd,en,en,"underscore refer to",test
|
377 |
+
Muennighoff/xwinograd,en,en,"Replace",test
|
378 |
+
Muennighoff/xwinograd,en,en,"stand for",test
|
379 |
+
Muennighoff/xwinograd,en,en,"does underscore refer to",test
|
380 |
+
Muennighoff/xwinograd,en,en,"True or False",test
|
381 |
+
Muennighoff/xwinograd,fr,en,"underscore refer to",test
|
382 |
+
Muennighoff/xwinograd,fr,en,"Replace",test
|
383 |
+
Muennighoff/xwinograd,fr,en,"stand for",test
|
384 |
+
Muennighoff/xwinograd,fr,en,"does underscore refer to",test
|
385 |
+
Muennighoff/xwinograd,fr,en,"True or False",test
|
386 |
+
Muennighoff/xwinograd,pt,en,"underscore refer to",test
|
387 |
+
Muennighoff/xwinograd,pt,en,"Replace",test
|
388 |
+
Muennighoff/xwinograd,pt,en,"stand for",test
|
389 |
+
Muennighoff/xwinograd,pt,en,"does underscore refer to",test
|
390 |
+
Muennighoff/xwinograd,pt,en,"True or False",test
|
391 |
+
Muennighoff/xwinograd,zh,en,"underscore refer to",test
|
392 |
+
Muennighoff/xwinograd,zh,en,"Replace",test
|
393 |
+
Muennighoff/xwinograd,zh,en,"stand for",test
|
394 |
+
Muennighoff/xwinograd,zh,en,"does underscore refer to",test
|
395 |
+
Muennighoff/xwinograd,zh,en,"True or False",test
|
396 |
+
)
|
397 |
+
|
398 |
+
DATASETS_AND_CONFIGS_L2=(
|
399 |
+
Muennighoff/xstory_cloze,ru,en,"Story Continuation and Options",validation
|
400 |
+
Muennighoff/xstory_cloze,ru,en,"Answer Given options",validation
|
401 |
+
Muennighoff/xstory_cloze,ru,en,"Novel Correct Ending",validation
|
402 |
+
Muennighoff/xstory_cloze,ru,en,"Generate Ending",validation
|
403 |
+
Muennighoff/xstory_cloze,ru,en,"Choose Story Ending",validation
|
404 |
+
Muennighoff/xstory_cloze,my,en,"Story Continuation and Options",validation
|
405 |
+
Muennighoff/xstory_cloze,my,en,"Answer Given options",validation
|
406 |
+
Muennighoff/xstory_cloze,my,en,"Novel Correct Ending",validation
|
407 |
+
Muennighoff/xstory_cloze,my,en,"Generate Ending",validation
|
408 |
+
Muennighoff/xstory_cloze,my,en,"Choose Story Ending",validation
|
409 |
+
xnli,bg,en,"guaranteed/possible/impossible",validation
|
410 |
+
xnli,bg,en,"MNLI crowdsource",validation
|
411 |
+
xnli,bg,en,"GPT-3 style",validation
|
412 |
+
xnli,bg,en,"justified in saying",validation
|
413 |
+
xnli,bg,en,"can we infer",validation
|
414 |
+
xnli,de,en,"guaranteed/possible/impossible",validation
|
415 |
+
xnli,de,en,"MNLI crowdsource",validation
|
416 |
+
xnli,de,en,"GPT-3 style",validation
|
417 |
+
xnli,de,en,"justified in saying",validation
|
418 |
+
xnli,de,en,"can we infer",validation
|
419 |
+
xnli,el,en,"guaranteed/possible/impossible",validation
|
420 |
+
xnli,el,en,"MNLI crowdsource",validation
|
421 |
+
xnli,el,en,"GPT-3 style",validation
|
422 |
+
xnli,el,en,"justified in saying",validation
|
423 |
+
xnli,el,en,"can we infer",validation
|
424 |
+
xnli,ru,en,"guaranteed/possible/impossible",validation
|
425 |
+
xnli,ru,en,"MNLI crowdsource",validation
|
426 |
+
xnli,ru,en,"GPT-3 style",validation
|
427 |
+
xnli,ru,en,"justified in saying",validation
|
428 |
+
xnli,ru,en,"can we infer",validation
|
429 |
+
xnli,th,en,"guaranteed/possible/impossible",validation
|
430 |
+
xnli,th,en,"MNLI crowdsource",validation
|
431 |
+
xnli,th,en,"GPT-3 style",validation
|
432 |
+
xnli,th,en,"justified in saying",validation
|
433 |
+
xnli,th,en,"can we infer",validation
|
434 |
+
xnli,tr,en,"guaranteed/possible/impossible",validation
|
435 |
+
xnli,tr,en,"MNLI crowdsource",validation
|
436 |
+
xnli,tr,en,"GPT-3 style",validation
|
437 |
+
xnli,tr,en,"justified in saying",validation
|
438 |
+
xnli,tr,en,"can we infer",validation
|
439 |
+
Muennighoff/xwinograd,ru,en,"underscore refer to",test
|
440 |
+
Muennighoff/xwinograd,ru,en,"Replace",test
|
441 |
+
Muennighoff/xwinograd,ru,en,"stand for",test
|
442 |
+
Muennighoff/xwinograd,ru,en,"does underscore refer to",test
|
443 |
+
Muennighoff/xwinograd,ru,en,"True or False",test
|
444 |
+
Muennighoff/xwinograd,jp,en,"underscore refer to",test
|
445 |
+
Muennighoff/xwinograd,jp,en,"Replace",test
|
446 |
+
Muennighoff/xwinograd,jp,en,"stand for",test
|
447 |
+
Muennighoff/xwinograd,jp,en,"does underscore refer to",test
|
448 |
+
Muennighoff/xwinograd,jp,en,"True or False",test
|
449 |
+
xcopa,et,en,"best_option",validation
|
450 |
+
xcopa,et,en,"C1 or C2? premise, so/because…",validation
|
451 |
+
xcopa,et,en,"i_am_hesitating",validation
|
452 |
+
xcopa,et,en,"cause_effect",validation
|
453 |
+
xcopa,et,en,"plausible_alternatives",validation
|
454 |
+
xcopa,ht,en,"best_option",validation
|
455 |
+
xcopa,ht,en,"C1 or C2? premise, so/because…",validation
|
456 |
+
xcopa,ht,en,"i_am_hesitating",validation
|
457 |
+
xcopa,ht,en,"cause_effect",validation
|
458 |
+
xcopa,ht,en,"plausible_alternatives",validation
|
459 |
+
xcopa,it,en,"best_option",validation
|
460 |
+
xcopa,it,en,"C1 or C2? premise, so/because…",validation
|
461 |
+
xcopa,it,en,"i_am_hesitating",validation
|
462 |
+
xcopa,it,en,"cause_effect",validation
|
463 |
+
xcopa,it,en,"plausible_alternatives",validation
|
464 |
+
xcopa,qu,en,"best_option",validation
|
465 |
+
xcopa,qu,en,"C1 or C2? premise, so/because…",validation
|
466 |
+
xcopa,qu,en,"i_am_hesitating",validation
|
467 |
+
xcopa,qu,en,"cause_effect",validation
|
468 |
+
xcopa,qu,en,"plausible_alternatives",validation
|
469 |
+
xcopa,th,en,"best_option",validation
|
470 |
+
xcopa,th,en,"C1 or C2? premise, so/because…",validation
|
471 |
+
xcopa,th,en,"i_am_hesitating",validation
|
472 |
+
xcopa,th,en,"cause_effect",validation
|
473 |
+
xcopa,th,en,"plausible_alternatives",validation
|
474 |
+
xcopa,tr,en,"best_option",validation
|
475 |
+
xcopa,tr,en,"C1 or C2? premise, so/because…",validation
|
476 |
+
xcopa,tr,en,"i_am_hesitating",validation
|
477 |
+
xcopa,tr,en,"cause_effect",validation
|
478 |
+
xcopa,tr,en,"plausible_alternatives",validation
|
479 |
+
)
|
480 |
+
|
481 |
+
DATASETS_AND_CONFIGS_MT_L1=(
|
482 |
+
Muennighoff/xstory_cloze,ar,ar,"Story Continuation and Options_armt",validation
|
483 |
+
Muennighoff/xstory_cloze,ar,ar,"Answer Given options_armt",validation
|
484 |
+
Muennighoff/xstory_cloze,ar,ar,"Novel Correct Ending_armt",validation
|
485 |
+
Muennighoff/xstory_cloze,ar,ar,"Generate Ending_armt",validation
|
486 |
+
Muennighoff/xstory_cloze,ar,ar,"Choose Story Ending_armt",validation
|
487 |
+
Muennighoff/xstory_cloze,es,es,"Story Continuation and Options_esmt",validation
|
488 |
+
Muennighoff/xstory_cloze,es,es,"Answer Given options_esmt",validation
|
489 |
+
Muennighoff/xstory_cloze,es,es,"Novel Correct Ending_esmt",validation
|
490 |
+
Muennighoff/xstory_cloze,es,es,"Generate Ending_esmt",validation
|
491 |
+
Muennighoff/xstory_cloze,es,es,"Choose Story Ending_esmt",validation
|
492 |
+
Muennighoff/xstory_cloze,eu,eu,"Story Continuation and Options_eumt",validation
|
493 |
+
Muennighoff/xstory_cloze,eu,eu,"Answer Given options_eumt",validation
|
494 |
+
Muennighoff/xstory_cloze,eu,eu,"Novel Correct Ending_eumt",validation
|
495 |
+
Muennighoff/xstory_cloze,eu,eu,"Generate Ending_eumt",validation
|
496 |
+
Muennighoff/xstory_cloze,eu,eu,"Choose Story Ending_eumt",validation
|
497 |
+
Muennighoff/xstory_cloze,id,id,"Story Continuation and Options_idmt",validation
|
498 |
+
Muennighoff/xstory_cloze,id,id,"Answer Given options_idmt",validation
|
499 |
+
Muennighoff/xstory_cloze,id,id,"Novel Correct Ending_idmt",validation
|
500 |
+
Muennighoff/xstory_cloze,id,id,"Generate Ending_idmt",validation
|
501 |
+
Muennighoff/xstory_cloze,id,id,"Choose Story Ending_idmt",validation
|
502 |
+
Muennighoff/xstory_cloze,hi,hi,"Story Continuation and Options_himt",validation
|
503 |
+
Muennighoff/xstory_cloze,hi,hi,"Answer Given options_himt",validation
|
504 |
+
Muennighoff/xstory_cloze,hi,hi,"Novel Correct Ending_himt",validation
|
505 |
+
Muennighoff/xstory_cloze,hi,hi,"Generate Ending_himt",validation
|
506 |
+
Muennighoff/xstory_cloze,hi,hi,"Choose Story Ending_himt",validation
|
507 |
+
Muennighoff/xstory_cloze,sw,sw,"Story Continuation and Options_swmt",validation
|
508 |
+
Muennighoff/xstory_cloze,sw,sw,"Answer Given options_swmt",validation
|
509 |
+
Muennighoff/xstory_cloze,sw,sw,"Novel Correct Ending_swmt",validation
|
510 |
+
Muennighoff/xstory_cloze,sw,sw,"Generate Ending_swmt",validation
|
511 |
+
Muennighoff/xstory_cloze,sw,sw,"Choose Story Ending_swmt",validation
|
512 |
+
Muennighoff/xstory_cloze,te,te,"Story Continuation and Options_temt",validation
|
513 |
+
Muennighoff/xstory_cloze,te,te,"Answer Given options_temt",validation
|
514 |
+
Muennighoff/xstory_cloze,te,te,"Novel Correct Ending_temt",validation
|
515 |
+
Muennighoff/xstory_cloze,te,te,"Generate Ending_temt",validation
|
516 |
+
Muennighoff/xstory_cloze,te,te,"Choose Story Ending_temt",validation
|
517 |
+
Muennighoff/xstory_cloze,zh,zh,"Story Continuation and Options_zhmt",validation
|
518 |
+
Muennighoff/xstory_cloze,zh,zh,"Answer Given options_zhmt",validation
|
519 |
+
Muennighoff/xstory_cloze,zh,zh,"Novel Correct Ending_zhmt",validation
|
520 |
+
Muennighoff/xstory_cloze,zh,zh,"Generate Ending_zhmt",validation
|
521 |
+
Muennighoff/xstory_cloze,zh,zh,"Choose Story Ending_zhmt",validation
|
522 |
+
Muennighoff/xwinograd,fr,fr,"underscore refer to_frmt",test
|
523 |
+
Muennighoff/xwinograd,fr,fr,"Replace_frmt",test
|
524 |
+
Muennighoff/xwinograd,fr,fr,"stand for_frmt",test
|
525 |
+
Muennighoff/xwinograd,fr,fr,"does underscore refer to_frmt",test
|
526 |
+
Muennighoff/xwinograd,fr,fr,"True or False_frmt",test
|
527 |
+
Muennighoff/xwinograd,pt,pt,"underscore refer to_ptmt",test
|
528 |
+
Muennighoff/xwinograd,pt,pt,"Replace_ptmt",test
|
529 |
+
Muennighoff/xwinograd,pt,pt,"stand for_ptmt",test
|
530 |
+
Muennighoff/xwinograd,pt,pt,"does underscore refer to_ptmt",test
|
531 |
+
Muennighoff/xwinograd,pt,pt,"True or False_ptmt",test
|
532 |
+
Muennighoff/xwinograd,zh,zh,"underscore refer to_zhmt",test
|
533 |
+
Muennighoff/xwinograd,zh,zh,"Replace_zhmt",test
|
534 |
+
Muennighoff/xwinograd,zh,zh,"stand for_zhmt",test
|
535 |
+
Muennighoff/xwinograd,zh,zh,"does underscore refer to_zhmt",test
|
536 |
+
Muennighoff/xwinograd,zh,zh,"True or False_zhmt",test
|
537 |
+
xcopa,id,id,"best_option_idmt",validation
|
538 |
+
xcopa,id,id,"C1 or C2? premise_idmt",validation
|
539 |
+
xcopa,id,id,"i_am_hesitating_idmt",validation
|
540 |
+
xcopa,id,id,"cause_effect_idmt",validation
|
541 |
+
xcopa,id,id,"plausible_alternatives_idmt",validation
|
542 |
+
xcopa,sw,sw,"best_option_swmt",validation
|
543 |
+
xcopa,sw,sw,"C1 or C2? premise_swmt",validation
|
544 |
+
xcopa,sw,sw,"i_am_hesitating_swmt",validation
|
545 |
+
xcopa,sw,sw,"cause_effect_swmt",validation
|
546 |
+
xcopa,sw,sw,"plausible_alternatives_swmt",validation
|
547 |
+
xcopa,ta,ta,"best_option_tamt",validation
|
548 |
+
xcopa,ta,ta,"C1 or C2? premise_tamt",validation
|
549 |
+
xcopa,ta,ta,"i_am_hesitating_tamt",validation
|
550 |
+
xcopa,ta,ta,"cause_effect_tamt",validation
|
551 |
+
xcopa,ta,ta,"plausible_alternatives_tamt",validation
|
552 |
+
xcopa,vi,vi,"best_option_vimt",validation
|
553 |
+
xcopa,vi,vi,"C1 or C2? premise_vimt",validation
|
554 |
+
xcopa,vi,vi,"i_am_hesitating_vimt",validation
|
555 |
+
xcopa,vi,vi,"cause_effect_vimt",validation
|
556 |
+
xcopa,vi,vi,"plausible_alternatives_vimt",validation
|
557 |
+
xcopa,zh,zh,"best_option_zhmt",validation
|
558 |
+
xcopa,zh,zh,"C1 or C2? premise_zhmt",validation
|
559 |
+
xcopa,zh,zh,"i_am_hesitating_zhmt",validation
|
560 |
+
xcopa,zh,zh,"cause_effect_zhmt",validation
|
561 |
+
xcopa,zh,zh,"plausible_alternatives_zhmt",validation
|
562 |
+
)
|
563 |
+
|
564 |
+
DATASETS_AND_CONFIGS_ZHHT=(
|
565 |
+
Muennighoff/xstory_cloze,zh,zh,"Story Continuation and Options_zhht",validation
|
566 |
+
Muennighoff/xstory_cloze,zh,zh,"Answer Given options_zhht",validation
|
567 |
+
Muennighoff/xstory_cloze,zh,zh,"Novel Correct Ending_zhht",validation
|
568 |
+
Muennighoff/xstory_cloze,zh,zh,"Generate Ending_zhht",validation
|
569 |
+
Muennighoff/xstory_cloze,zh,zh,"Choose Story Ending_zhht",validation
|
570 |
+
Muennighoff/xwinograd,zh,zh,"underscore refer to_zhht",test
|
571 |
+
Muennighoff/xwinograd,zh,zh,"Replace_zhht",test
|
572 |
+
Muennighoff/xwinograd,zh,zh,"stand for_zhht",test
|
573 |
+
Muennighoff/xwinograd,zh,zh,"does underscore refer to_zhht",test
|
574 |
+
Muennighoff/xwinograd,zh,zh,"True or False_zhht",test
|
575 |
+
xcopa,zh,zh,"best_option_zhht",validation
|
576 |
+
xcopa,zh,zh,"C1 or C2? premise_zhht",validation
|
577 |
+
xcopa,zh,zh,"i_am_hesitating_zhht",validation
|
578 |
+
xcopa,zh,zh,"cause_effect_zhht",validation
|
579 |
+
xcopa,zh,zh,"plausible_alternatives_zhht",validation
|
580 |
+
)
|
581 |
+
|
582 |
+
DATASETS_AND_CONFIGS_XNLIHTMT=(
|
583 |
+
xnli,ar,ar,"guaranteed/possible/impossible_arht",validation
|
584 |
+
xnli,ar,ar,"MNLI crowdsource_arht",validation
|
585 |
+
xnli,ar,ar,"GPT-3 style_arht",validation
|
586 |
+
xnli,ar,ar,"justified in saying_arht",validation
|
587 |
+
xnli,ar,ar,"can we infer_arht",validation
|
588 |
+
xnli,ar,ar,"guaranteed/possible/impossible_armt",validation
|
589 |
+
xnli,ar,ar,"MNLI crowdsource_armt",validation
|
590 |
+
xnli,ar,ar,"GPT-3 style_armt",validation
|
591 |
+
xnli,ar,ar,"justified in saying_armt",validation
|
592 |
+
xnli,ar,ar,"can we infer_armt",validation
|
593 |
+
xnli,es,es,"guaranteed/possible/impossible_esht",validation
|
594 |
+
xnli,es,es,"MNLI crowdsource_esht",validation
|
595 |
+
xnli,es,es,"GPT-3 style_esht",validation
|
596 |
+
xnli,es,es,"justified in saying_esht",validation
|
597 |
+
xnli,es,es,"can we infer_esht",validation
|
598 |
+
xnli,es,es,"guaranteed/possible/impossible_esmt",validation
|
599 |
+
xnli,es,es,"MNLI crowdsource_esmt",validation
|
600 |
+
xnli,es,es,"GPT-3 style_esmt",validation
|
601 |
+
xnli,es,es,"justified in saying_esmt",validation
|
602 |
+
xnli,es,es,"can we infer_esmt",validation
|
603 |
+
xnli,fr,fr,"guaranteed/possible/impossible_frht",validation
|
604 |
+
xnli,fr,fr,"MNLI crowdsource_frht",validation
|
605 |
+
xnli,fr,fr,"GPT-3 style_frht",validation
|
606 |
+
xnli,fr,fr,"justified in saying_frht",validation
|
607 |
+
xnli,fr,fr,"can we infer_frht",validation
|
608 |
+
xnli,fr,fr,"guaranteed/possible/impossible_frmt",validation
|
609 |
+
xnli,fr,fr,"MNLI crowdsource_frmt",validation
|
610 |
+
xnli,fr,fr,"GPT-3 style_frmt",validation
|
611 |
+
xnli,fr,fr,"justified in saying_frmt",validation
|
612 |
+
xnli,fr,fr,"can we infer_frmt",validation
|
613 |
+
xnli,hi,hi,"guaranteed/possible/impossible_hiht",validation
|
614 |
+
xnli,hi,hi,"MNLI crowdsource_hiht",validation
|
615 |
+
xnli,hi,hi,"GPT-3 style_hiht",validation
|
616 |
+
xnli,hi,hi,"justified in saying_hiht",validation
|
617 |
+
xnli,hi,hi,"can we infer_hiht",validation
|
618 |
+
xnli,hi,hi,"guaranteed/possible/impossible_himt",validation
|
619 |
+
xnli,hi,hi,"MNLI crowdsource_himt",validation
|
620 |
+
xnli,hi,hi,"GPT-3 style_himt",validation
|
621 |
+
xnli,hi,hi,"justified in saying_himt",validation
|
622 |
+
xnli,hi,hi,"can we infer_himt",validation
|
623 |
+
xnli,ur,ur,"guaranteed/possible/impossible_urht",validation
|
624 |
+
xnli,ur,ur,"MNLI crowdsource_urht",validation
|
625 |
+
xnli,ur,ur,"GPT-3 style_urht",validation
|
626 |
+
xnli,ur,ur,"justified in saying_urht",validation
|
627 |
+
xnli,ur,ur,"can we infer_urht",validation
|
628 |
+
xnli,ur,ur,"guaranteed/possible/impossible_urmt",validation
|
629 |
+
xnli,ur,ur,"MNLI crowdsource_urmt",validation
|
630 |
+
xnli,ur,ur,"GPT-3 style_urmt",validation
|
631 |
+
xnli,ur,ur,"justified in saying_urmt",validation
|
632 |
+
xnli,ur,ur,"can we infer_urmt",validation
|
633 |
+
xnli,sw,sw,"guaranteed/possible/impossible_swht",validation
|
634 |
+
xnli,sw,sw,"MNLI crowdsource_swht",validation
|
635 |
+
xnli,sw,sw,"GPT-3 style_swht",validation
|
636 |
+
xnli,sw,sw,"justified in saying_swht",validation
|
637 |
+
xnli,sw,sw,"can we infer_swht",validation
|
638 |
+
xnli,sw,sw,"guaranteed/possible/impossible_swmt",validation
|
639 |
+
xnli,sw,sw,"MNLI crowdsource_swmt",validation
|
640 |
+
xnli,sw,sw,"GPT-3 style_swmt",validation
|
641 |
+
xnli,sw,sw,"justified in saying_swmt",validation
|
642 |
+
xnli,sw,sw,"can we infer_swmt",validation
|
643 |
+
xnli,vi,vi,"guaranteed/possible/impossible_viht",validation
|
644 |
+
xnli,vi,vi,"MNLI crowdsource_viht",validation
|
645 |
+
xnli,vi,vi,"GPT-3 style_viht",validation
|
646 |
+
xnli,vi,vi,"justified in saying_viht",validation
|
647 |
+
xnli,vi,vi,"can we infer_viht",validation
|
648 |
+
xnli,vi,vi,"guaranteed/possible/impossible_vimt",validation
|
649 |
+
xnli,vi,vi,"MNLI crowdsource_vimt",validation
|
650 |
+
xnli,vi,vi,"GPT-3 style_vimt",validation
|
651 |
+
xnli,vi,vi,"justified in saying_vimt",validation
|
652 |
+
xnli,vi,vi,"can we infer_vimt",validation
|
653 |
+
xnli,zh,zh,"guaranteed/possible/impossible_zhht",validation
|
654 |
+
xnli,zh,zh,"MNLI crowdsource_zhht",validation
|
655 |
+
xnli,zh,zh,"GPT-3 style_zhht",validation
|
656 |
+
xnli,zh,zh,"justified in saying_zhht",validation
|
657 |
+
xnli,zh,zh,"can we infer_zhht",validation
|
658 |
+
xnli,zh,zh,"guaranteed/possible/impossible_zhmt",validation
|
659 |
+
xnli,zh,zh,"MNLI crowdsource_zhmt",validation
|
660 |
+
xnli,zh,zh,"GPT-3 style_zhmt",validation
|
661 |
+
xnli,zh,zh,"justified in saying_zhmt",validation
|
662 |
+
xnli,zh,zh,"can we infer_zhmt",validation
|
663 |
+
)
|
664 |
+
|
665 |
+
DATASETS_AND_CONFIGS_MT_L2=(
|
666 |
+
Muennighoff/xstory_cloze,my,my,"Story Continuation and Options_mymt",validation
|
667 |
+
Muennighoff/xstory_cloze,my,my,"Answer Given options_mymt",validation
|
668 |
+
Muennighoff/xstory_cloze,my,my,"Novel Correct Ending_mymt",validation
|
669 |
+
Muennighoff/xstory_cloze,my,my,"Generate Ending_mymt",validation
|
670 |
+
Muennighoff/xstory_cloze,my,my,"Choose Story Ending_mymt",validation
|
671 |
+
Muennighoff/xstory_cloze,ru,ru,"Story Continuation and Options_rumt",validation
|
672 |
+
Muennighoff/xstory_cloze,ru,ru,"Answer Given options_rumt",validation
|
673 |
+
Muennighoff/xstory_cloze,ru,ru,"Novel Correct Ending_rumt",validation
|
674 |
+
Muennighoff/xstory_cloze,ru,ru,"Generate Ending_rumt",validation
|
675 |
+
Muennighoff/xstory_cloze,ru,ru,"Choose Story Ending_rumt",validation
|
676 |
+
Muennighoff/xstory_cloze,sw,sw,"Story Continuation and Options_swmt",validation
|
677 |
+
Muennighoff/xstory_cloze,sw,sw,"Answer Given options_swmt",validation
|
678 |
+
Muennighoff/xstory_cloze,sw,sw,"Novel Correct Ending_swmt",validation
|
679 |
+
Muennighoff/xstory_cloze,sw,sw,"Generate Ending_swmt",validation
|
680 |
+
Muennighoff/xstory_cloze,sw,sw,"Choose Story Ending_swmt",validation
|
681 |
+
Muennighoff/xstory_cloze,te,te,"Story Continuation and Options_temt",validation
|
682 |
+
Muennighoff/xstory_cloze,te,te,"Answer Given options_temt",validation
|
683 |
+
Muennighoff/xstory_cloze,te,te,"Novel Correct Ending_temt",validation
|
684 |
+
Muennighoff/xstory_cloze,te,te,"Generate Ending_temt",validation
|
685 |
+
Muennighoff/xstory_cloze,te,te,"Choose Story Ending_temt",validation
|
686 |
+
Muennighoff/xwinograd,jp,jp,"underscore refer to_jpmt",test
|
687 |
+
Muennighoff/xwinograd,jp,jp,"Replace_jpmt",test
|
688 |
+
Muennighoff/xwinograd,jp,jp,"stand for_jpmt",test
|
689 |
+
Muennighoff/xwinograd,jp,jp,"does underscore refer to_jpmt",test
|
690 |
+
Muennighoff/xwinograd,jp,jp,"True or False_jpmt",test
|
691 |
+
Muennighoff/xwinograd,ru,ru,"underscore refer to_rumt",test
|
692 |
+
Muennighoff/xwinograd,ru,ru,"Replace_rumt",test
|
693 |
+
Muennighoff/xwinograd,ru,ru,"stand for_rumt",test
|
694 |
+
Muennighoff/xwinograd,ru,ru,"does underscore refer to_rumt",test
|
695 |
+
Muennighoff/xwinograd,ru,ru,"True or False_rumt",test
|
696 |
+
xcopa,et,et,"best_option_etmt",validation
|
697 |
+
xcopa,et,et,"C1 or C2? premise_etmt",validation
|
698 |
+
xcopa,et,et,"i_am_hesitating_etmt",validation
|
699 |
+
xcopa,et,et,"cause_effect_etmt",validation
|
700 |
+
xcopa,et,et,"plausible_alternatives_etmt",validation
|
701 |
+
xcopa,ht,ht,"best_option_htmt",validation
|
702 |
+
xcopa,ht,ht,"C1 or C2? premise_htmt",validation
|
703 |
+
xcopa,ht,ht,"i_am_hesitating_htmt",validation
|
704 |
+
xcopa,ht,ht,"cause_effect_htmt",validation
|
705 |
+
xcopa,ht,ht,"plausible_alternatives_htmt",validation
|
706 |
+
xcopa,it,it,"best_option_itmt",validation
|
707 |
+
xcopa,it,it,"C1 or C2? premise_itmt",validation
|
708 |
+
xcopa,it,it,"i_am_hesitating_itmt",validation
|
709 |
+
xcopa,it,it,"cause_effect_itmt",validation
|
710 |
+
xcopa,it,it,"plausible_alternatives_itmt",validation
|
711 |
+
xcopa,qu,qu,"best_option_qumt",validation
|
712 |
+
xcopa,qu,qu,"C1 or C2? premise_qumt",validation
|
713 |
+
xcopa,qu,qu,"i_am_hesitating_qumt",validation
|
714 |
+
xcopa,qu,qu,"cause_effect_qumt",validation
|
715 |
+
xcopa,qu,qu,"plausible_alternatives_qumt",validation
|
716 |
+
xcopa,th,th,"best_option_thmt",validation
|
717 |
+
xcopa,th,th,"C1 or C2? premise_thmt",validation
|
718 |
+
xcopa,th,th,"i_am_hesitating_thmt",validation
|
719 |
+
xcopa,th,th,"cause_effect_thmt",validation
|
720 |
+
xcopa,th,th,"plausible_alternatives_thmt",validation
|
721 |
+
xcopa,tr,tr,"best_option_trmt",validation
|
722 |
+
xcopa,tr,tr,"C1 or C2? premise_trmt",validation
|
723 |
+
xcopa,tr,tr,"i_am_hesitating_trmt",validation
|
724 |
+
xcopa,tr,tr,"cause_effect_trmt",validation
|
725 |
+
xcopa,tr,tr,"plausible_alternatives_trmt",validation
|
726 |
+
xnli,bg,bg,"guaranteed/possible/impossible_bgmt",validation
|
727 |
+
xnli,bg,bg,"MNLI crowdsource_bgmt",validation
|
728 |
+
xnli,bg,bg,"GPT-3 style_bgmt",validation
|
729 |
+
xnli,bg,bg,"justified in saying_bgmt",validation
|
730 |
+
xnli,bg,bg,"can we infer_bgmt",validation
|
731 |
+
xnli,de,de,"guaranteed/possible/impossible_demt",validation
|
732 |
+
xnli,de,de,"MNLI crowdsource_demt",validation
|
733 |
+
xnli,de,de,"GPT-3 style_demt",validation
|
734 |
+
xnli,de,de,"justified in saying_demt",validation
|
735 |
+
xnli,de,de,"can we infer_demt",validation
|
736 |
+
xnli,el,el,"guaranteed/possible/impossible_elmt",validation
|
737 |
+
xnli,el,el,"MNLI crowdsource_elmt",validation
|
738 |
+
xnli,el,el,"GPT-3 style_elmt",validation
|
739 |
+
xnli,el,el,"justified in saying_elmt",validation
|
740 |
+
xnli,el,el,"can we infer_elmt",validation
|
741 |
+
xnli,ru,ru,"guaranteed/possible/impossible_rumt",validation
|
742 |
+
xnli,ru,ru,"MNLI crowdsource_rumt",validation
|
743 |
+
xnli,ru,ru,"GPT-3 style_rumt",validation
|
744 |
+
xnli,ru,ru,"justified in saying_rumt",validation
|
745 |
+
xnli,ru,ru,"can we infer_rumt",validation
|
746 |
+
xnli,th,th,"guaranteed/possible/impossible_thmt",validation
|
747 |
+
xnli,th,th,"MNLI crowdsource_thmt",validation
|
748 |
+
xnli,th,th,"GPT-3 style_thmt",validation
|
749 |
+
xnli,th,th,"justified in saying_thmt",validation
|
750 |
+
xnli,th,th,"can we infer_thmt",validation
|
751 |
+
xnli,tr,tr,"guaranteed/possible/impossible_trmt",validation
|
752 |
+
xnli,tr,tr,"MNLI crowdsource_trmt",validation
|
753 |
+
xnli,tr,tr,"GPT-3 style_trmt",validation
|
754 |
+
xnli,tr,tr,"justified in saying_trmt",validation
|
755 |
+
xnli,tr,tr,"can we infer_trmt",validation
|
756 |
+
)
|
757 |
+
|
758 |
+
DATASETS_AND_CONFIGS_RU=(
|
759 |
+
Muennighoff/xstory_cloze,ru,en,"Story Continuation and Options",validation
|
760 |
+
Muennighoff/xstory_cloze,ru,en,"Answer Given options",validation
|
761 |
+
Muennighoff/xstory_cloze,ru,en,"Novel Correct Ending",validation
|
762 |
+
Muennighoff/xstory_cloze,ru,en,"Generate Ending",validation
|
763 |
+
Muennighoff/xstory_cloze,ru,en,"Choose Story Ending",validation
|
764 |
+
Muennighoff/xwinograd,ru,en,"underscore refer to",test
|
765 |
+
Muennighoff/xwinograd,ru,en,"Replace",test
|
766 |
+
Muennighoff/xwinograd,ru,en,"stand for",test
|
767 |
+
Muennighoff/xwinograd,ru,en,"does underscore refer to",test
|
768 |
+
Muennighoff/xwinograd,ru,en,"True or False",test
|
769 |
+
xnli,ru,en,"guaranteed/possible/impossible",validation
|
770 |
+
xnli,ru,en,"MNLI crowdsource",validation
|
771 |
+
xnli,ru,en,"GPT-3 style",validation
|
772 |
+
xnli,ru,en,"justified in saying",validation
|
773 |
+
xnli,ru,en,"can we infer",validation
|
774 |
+
)
|
775 |
+
|
776 |
+
DATASET_AND_CONFIG=${DATASETS_AND_CONFIGS_RU[$SLURM_ARRAY_TASK_ID]}
|
777 |
+
echo $ARGUMENT
|
778 |
+
|
779 |
+
# Run T0 evaluation
|
780 |
+
# For PrefixLM add --prefixlm
|
781 |
+
IFS=',' read dataset_name dataset_config_name template_config_name template_name split <<< "${DATASET_AND_CONFIG}"
|
782 |
+
python promptsource/t-zero/evaluation/run_eval.py \
|
783 |
+
--dataset_name $dataset_name \
|
784 |
+
--dataset_config_name $dataset_config_name \
|
785 |
+
--template_config_name $template_config_name \
|
786 |
+
--template_name "$template_name" \
|
787 |
+
--split $split \
|
788 |
+
--model_name_or_path $CHECKPOINT_PATH \
|
789 |
+
--output_dir $OUTPUT_DIR \
|
790 |
+
--per_device_eval_batch_size 4 \
|
791 |
+
--max_length 2048 \
|
792 |
+
--dtype float16
|
launch.sh
ADDED
@@ -0,0 +1,53 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/bin/bash
|
2 |
+
|
3 |
+
# Launch script using torch.distributed.run(). Used by slurm
|
4 |
+
# scripts, don't invoke directly.
|
5 |
+
|
6 |
+
# Samuel's fix for apparent error in SLURM initialization
|
7 |
+
if [ $SLURM_LOCALID -eq 0 ]; then
|
8 |
+
rm -rf /dev/shm/*
|
9 |
+
rocm-smi || true
|
10 |
+
else
|
11 |
+
sleep 2
|
12 |
+
fi
|
13 |
+
|
14 |
+
export NCCL_SOCKET_IFNAME=hsn0,hsn1,hsn2,hsn3
|
15 |
+
export OMP_NUM_THREADS=$SLURM_CPUS_PER_TASK
|
16 |
+
export FI_CXI_DEFAULT_CQ_SIZE=131072
|
17 |
+
|
18 |
+
# debugging (noisy)
|
19 |
+
#export NCCL_DEBUG=INFO
|
20 |
+
#export RCCL_KERNEL_COLL_TRACE_ENABLE=1
|
21 |
+
#export NCCL_DEBUG_SUBSYS=INIT,COLL
|
22 |
+
|
23 |
+
module --quiet purge
|
24 |
+
module load cray-python
|
25 |
+
|
26 |
+
module load CrayEnv
|
27 |
+
module load PrgEnv-cray/8.3.3
|
28 |
+
module load craype-accel-amd-gfx90a
|
29 |
+
module load cray-python
|
30 |
+
|
31 |
+
module use /pfs/lustrep2/projappl/project_462000125/samantao-public/mymodules
|
32 |
+
module load suse-repo-deps/sam-default
|
33 |
+
module load rocm/sam-5.2.3.lua
|
34 |
+
module load rccl/sam-develop.lua
|
35 |
+
module load aws-ofi-rccl/sam-default.lua
|
36 |
+
|
37 |
+
source /scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/bin/activate
|
38 |
+
|
39 |
+
MASTER_NODE=$(scontrol show hostnames "$SLURM_JOB_NODELIST" | head -n 1)
|
40 |
+
MASTER_PORT=9999
|
41 |
+
|
42 |
+
echo "Launching on $SLURMD_NODENAME ($SLURM_PROCID/$SLURM_JOB_NUM_NODES)," \
|
43 |
+
"master $MASTER_NODE port $MASTER_PORT," \
|
44 |
+
"GPUs $SLURM_GPUS_ON_NODE," \
|
45 |
+
"CUDA: $(python -c 'import torch; print(torch.cuda.is_available())')"
|
46 |
+
|
47 |
+
python -u -m torch.distributed.run \
|
48 |
+
--nnodes $SLURM_JOB_NUM_NODES \
|
49 |
+
--nproc_per_node $SLURM_GPUS_ON_NODE \
|
50 |
+
--node_rank=$SLURM_PROCID \
|
51 |
+
--master_addr $MASTER_NODE \
|
52 |
+
--master_port $MASTER_PORT \
|
53 |
+
"$@"
|
sbatch_mtf_4b_ru.sh
ADDED
@@ -0,0 +1,147 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/bin/bash
|
2 |
+
#SBATCH --nodes=8
|
3 |
+
#SBATCH --ntasks-per-node=1
|
4 |
+
#SBATCH --cpus-per-task=32
|
5 |
+
#SBATCH --mem=256G
|
6 |
+
#SBATCH -p pilot
|
7 |
+
#SBATCH -t 48:00:00
|
8 |
+
#SBATCH --gpus-per-node=mi250:8
|
9 |
+
#SBATCH --exclusive=user
|
10 |
+
#SBATCH --hint=nomultithread
|
11 |
+
#SBATCH --account=project_462000119
|
12 |
+
#SBATCH -o logs/%j.out
|
13 |
+
#SBATCH -e logs/%j.err
|
14 |
+
|
15 |
+
# if run without sbatch, invoke here
|
16 |
+
#if [ -z $SLURM_JOB_ID ]; then
|
17 |
+
# mkdir -p logs
|
18 |
+
# sbatch "$0"
|
19 |
+
# exit
|
20 |
+
#fi
|
21 |
+
|
22 |
+
VARIANT=7b1ru2
|
23 |
+
|
24 |
+
set -euo pipefail
|
25 |
+
|
26 |
+
# symlink logs/latest.out and logs/latest.err
|
27 |
+
ln -f -s $SLURM_JOB_ID.out logs/latest.out
|
28 |
+
ln -f -s $SLURM_JOB_ID.err logs/latest.err
|
29 |
+
|
30 |
+
KILL_SWITCH_PATH=kill-switch-$VARIANT
|
31 |
+
CHECKPOINT_PATH=checkpoints_$VARIANT
|
32 |
+
TENSORBOARD_PATH=tensorboard_$VARIANT
|
33 |
+
|
34 |
+
# Data
|
35 |
+
TOKENIZER_NAME_OR_PATH=bigscience/tokenizer
|
36 |
+
|
37 |
+
TRAIN_DATA_PATH=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-mtf/xp3ru_train.txt
|
38 |
+
VALID_DATA_PATH=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-mtf/xp3_validation_ru.txt
|
39 |
+
|
40 |
+
PP_SIZE=1
|
41 |
+
TP_SIZE=1
|
42 |
+
|
43 |
+
MICRO_BATCH_SIZE=2
|
44 |
+
GRADIENT_ACCUMULATION_STEPS=16
|
45 |
+
WORLD_SIZE=$((SLURM_GPUS_ON_NODE*SLURM_JOB_NUM_NODES))
|
46 |
+
GLOBAL_BATCH_SIZE=$((MICRO_BATCH_SIZE*WORLD_SIZE*GRADIENT_ACCUMULATION_STEPS))
|
47 |
+
|
48 |
+
# Model parameters
|
49 |
+
NLAYERS=30
|
50 |
+
NHIDDEN=4096
|
51 |
+
NHEADS=32
|
52 |
+
SEQ_LEN=2048
|
53 |
+
|
54 |
+
TRAIN_SAMPLES=6_348_800
|
55 |
+
|
56 |
+
SAVE_INTERVAL=500
|
57 |
+
|
58 |
+
ZERO_STAGE=1
|
59 |
+
|
60 |
+
mkdir -p ds_configs
|
61 |
+
config_json="ds_configs/$SLURM_JOB_ID.json"
|
62 |
+
|
63 |
+
cat <<EOT > $config_json
|
64 |
+
{
|
65 |
+
"train_micro_batch_size_per_gpu": $MICRO_BATCH_SIZE,
|
66 |
+
"train_batch_size": $GLOBAL_BATCH_SIZE,
|
67 |
+
"gradient_clipping": 1.0,
|
68 |
+
"zero_optimization": {
|
69 |
+
"stage": $ZERO_STAGE
|
70 |
+
},
|
71 |
+
"fp16": {
|
72 |
+
"enabled": true,
|
73 |
+
"loss_scale": 0,
|
74 |
+
"loss_scale_window": 500,
|
75 |
+
"hysteresis": 2,
|
76 |
+
"min_loss_scale": 1,
|
77 |
+
"initial_scale_power": 12
|
78 |
+
},
|
79 |
+
"steps_per_print": 2000,
|
80 |
+
"wall_clock_breakdown": false
|
81 |
+
}
|
82 |
+
EOT
|
83 |
+
|
84 |
+
|
85 |
+
CMD=" \
|
86 |
+
Megatron-DeepSpeed/finetune_t0.py \
|
87 |
+
--tensor-model-parallel-size $TP_SIZE \
|
88 |
+
--pipeline-model-parallel-size $PP_SIZE \
|
89 |
+
--num-layers $NLAYERS \
|
90 |
+
--hidden-size $NHIDDEN \
|
91 |
+
--num-attention-heads $NHEADS \
|
92 |
+
--seq-length $SEQ_LEN \
|
93 |
+
--max-position-embeddings $SEQ_LEN \
|
94 |
+
--micro-batch-size $MICRO_BATCH_SIZE \
|
95 |
+
--global-batch-size $GLOBAL_BATCH_SIZE \
|
96 |
+
--train-samples $TRAIN_SAMPLES \
|
97 |
+
--tokenizer-type PretrainedFromHF \
|
98 |
+
--tokenizer-name-or-path $TOKENIZER_NAME_OR_PATH \
|
99 |
+
--init-method-std 0.0048 \
|
100 |
+
--embed-layernorm \
|
101 |
+
--fp16 \
|
102 |
+
--seed 42 \
|
103 |
+
--position-embedding-type alibi \
|
104 |
+
--abort-on-unmet-fused-kernel-constraints \
|
105 |
+
--clip-grad 1.0 \
|
106 |
+
--kill-switch-path $KILL_SWITCH_PATH \
|
107 |
+
--checkpoint-activations \
|
108 |
+
--pad-vocab-size-to 250880 \
|
109 |
+
--optimizer adam \
|
110 |
+
--adam-beta1 0.9 \
|
111 |
+
--adam-beta2 0.95 \
|
112 |
+
--adam-eps 1e-8 \
|
113 |
+
--lr 2e-5 \
|
114 |
+
--lr-decay-style constant \
|
115 |
+
--lr-warmup-samples 0 \
|
116 |
+
--clip-grad 1.0 \
|
117 |
+
--weight-decay 1e-4 \
|
118 |
+
--no-load-optim \
|
119 |
+
--reset-progress \
|
120 |
+
--norm-target-loss \
|
121 |
+
--log-interval 10 \
|
122 |
+
--save-interval $SAVE_INTERVAL \
|
123 |
+
--eval-interval 500 \
|
124 |
+
--eval-iters 1 \
|
125 |
+
--tensorboard-dir $TENSORBOARD_PATH \
|
126 |
+
--tensorboard-queue-size 5 \
|
127 |
+
--log-timers-to-tensorboard \
|
128 |
+
--log-batch-size-to-tensorboard \
|
129 |
+
--log-validation-ppl-to-tensorboard \
|
130 |
+
--save $CHECKPOINT_PATH \
|
131 |
+
--load $CHECKPOINT_PATH \
|
132 |
+
--train-weighted-split-paths-path $TRAIN_DATA_PATH \
|
133 |
+
--valid-weighted-split-paths-path $VALID_DATA_PATH \
|
134 |
+
--dataloader-type single \
|
135 |
+
--data-impl mmap \
|
136 |
+
--deepspeed \
|
137 |
+
--deepspeed_config $config_json \
|
138 |
+
--zero-stage $ZERO_STAGE \
|
139 |
+
"
|
140 |
+
|
141 |
+
echo $CMD
|
142 |
+
|
143 |
+
echo "START $SLURM_JOBID: $(date)"
|
144 |
+
|
145 |
+
srun --label launch.sh $CMD
|
146 |
+
|
147 |
+
echo "END $SLURM_JOBID: $(date)"
|
train_ru.txt
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
"train: 1 0:1 /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-mtf/xp3rumegds/xp3_ru"
|