bobox commited on
Commit
e5ea348
1 Parent(s): 4b6a227

Training in progress, step 1540, checkpoint

Browse files
checkpoint-1540/1_Pooling/config.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "word_embedding_dimension": 768,
3
+ "pooling_mode_cls_token": false,
4
+ "pooling_mode_mean_tokens": true,
5
+ "pooling_mode_max_tokens": false,
6
+ "pooling_mode_mean_sqrt_len_tokens": false,
7
+ "pooling_mode_weightedmean_tokens": false,
8
+ "pooling_mode_lasttoken": false,
9
+ "include_prompt": true
10
+ }
checkpoint-1540/README.md ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-1540/added_tokens.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ {
2
+ "[MASK]": 128000
3
+ }
checkpoint-1540/config.json ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "microsoft/deberta-v3-small",
3
+ "architectures": [
4
+ "DebertaV2Model"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "hidden_act": "gelu",
8
+ "hidden_dropout_prob": 0.1,
9
+ "hidden_size": 768,
10
+ "initializer_range": 0.02,
11
+ "intermediate_size": 3072,
12
+ "layer_norm_eps": 1e-07,
13
+ "max_position_embeddings": 512,
14
+ "max_relative_positions": -1,
15
+ "model_type": "deberta-v2",
16
+ "norm_rel_ebd": "layer_norm",
17
+ "num_attention_heads": 12,
18
+ "num_hidden_layers": 6,
19
+ "pad_token_id": 0,
20
+ "pooler_dropout": 0,
21
+ "pooler_hidden_act": "gelu",
22
+ "pooler_hidden_size": 768,
23
+ "pos_att_type": [
24
+ "p2c",
25
+ "c2p"
26
+ ],
27
+ "position_biased_input": false,
28
+ "position_buckets": 256,
29
+ "relative_attention": true,
30
+ "share_att_key": true,
31
+ "torch_dtype": "float32",
32
+ "transformers_version": "4.42.4",
33
+ "type_vocab_size": 0,
34
+ "vocab_size": 128100
35
+ }
checkpoint-1540/config_sentence_transformers.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "__version__": {
3
+ "sentence_transformers": "3.0.1",
4
+ "transformers": "4.42.4",
5
+ "pytorch": "2.3.1+cu121"
6
+ },
7
+ "prompts": {},
8
+ "default_prompt_name": null,
9
+ "similarity_fn_name": null
10
+ }
checkpoint-1540/modules.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "idx": 0,
4
+ "name": "0",
5
+ "path": "",
6
+ "type": "sentence_transformers.models.Transformer"
7
+ },
8
+ {
9
+ "idx": 1,
10
+ "name": "1",
11
+ "path": "1_Pooling",
12
+ "type": "sentence_transformers.models.Pooling"
13
+ }
14
+ ]
checkpoint-1540/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:64442df3d29d7f919cee599932884e9fc8eb220843409b94ad999c725502e89d
3
+ size 1130520122
checkpoint-1540/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:da783cc8782ecbe6690c2fba419574d6df51cb50b1e509c8d197be8238332fa7
3
+ size 565251810
checkpoint-1540/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ab90a68b39be619760ba6dbf6b191504b704ef8e4eb8a46b1e3e6998565c9b36
3
+ size 14244
checkpoint-1540/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:521c244446e03ff9932f095ae3b572508f0e5b7772deddc404628fa014160cc4
3
+ size 1064
checkpoint-1540/sentence_bert_config.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "max_seq_length": 512,
3
+ "do_lower_case": false
4
+ }
checkpoint-1540/special_tokens_map.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "[CLS]",
3
+ "cls_token": "[CLS]",
4
+ "eos_token": "[SEP]",
5
+ "mask_token": "[MASK]",
6
+ "pad_token": "[PAD]",
7
+ "sep_token": "[SEP]",
8
+ "unk_token": {
9
+ "content": "[UNK]",
10
+ "lstrip": false,
11
+ "normalized": true,
12
+ "rstrip": false,
13
+ "single_word": false
14
+ }
15
+ }
checkpoint-1540/spm.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c679fbf93643d19aab7ee10c0b99e460bdbc02fedf34b92b05af343b4af586fd
3
+ size 2464616
checkpoint-1540/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-1540/tokenizer_config.json ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[PAD]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "1": {
12
+ "content": "[CLS]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "2": {
20
+ "content": "[SEP]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "3": {
28
+ "content": "[UNK]",
29
+ "lstrip": false,
30
+ "normalized": true,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "128000": {
36
+ "content": "[MASK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "bos_token": "[CLS]",
45
+ "clean_up_tokenization_spaces": true,
46
+ "cls_token": "[CLS]",
47
+ "do_lower_case": false,
48
+ "eos_token": "[SEP]",
49
+ "mask_token": "[MASK]",
50
+ "model_max_length": 1000000000000000019884624838656,
51
+ "pad_token": "[PAD]",
52
+ "sep_token": "[SEP]",
53
+ "sp_model_kwargs": {},
54
+ "split_by_punct": false,
55
+ "tokenizer_class": "DebertaV2Tokenizer",
56
+ "unk_token": "[UNK]",
57
+ "vocab_type": "spm"
58
+ }
checkpoint-1540/trainer_state.json ADDED
@@ -0,0 +1,333 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 0.2000779524490061,
5
+ "eval_steps": 1540,
6
+ "global_step": 1540,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.020007795244900612,
13
+ "grad_norm": 20.274534225463867,
14
+ "learning_rate": 1.121454860359385e-06,
15
+ "loss": 10.0783,
16
+ "step": 154
17
+ },
18
+ {
19
+ "epoch": 0.040015590489801224,
20
+ "grad_norm": 37.92953109741211,
21
+ "learning_rate": 2.288374106949556e-06,
22
+ "loss": 7.9365,
23
+ "step": 308
24
+ },
25
+ {
26
+ "epoch": 0.06002338573470183,
27
+ "grad_norm": 10.249253273010254,
28
+ "learning_rate": 3.4552933535397265e-06,
29
+ "loss": 7.0986,
30
+ "step": 462
31
+ },
32
+ {
33
+ "epoch": 0.08003118097960245,
34
+ "grad_norm": 37.03239822387695,
35
+ "learning_rate": 4.622212600129898e-06,
36
+ "loss": 6.0384,
37
+ "step": 616
38
+ },
39
+ {
40
+ "epoch": 0.10003897622450306,
41
+ "grad_norm": 28.280763626098633,
42
+ "learning_rate": 5.7891318467200685e-06,
43
+ "loss": 5.2434,
44
+ "step": 770
45
+ },
46
+ {
47
+ "epoch": 0.12004677146940367,
48
+ "grad_norm": 26.781627655029297,
49
+ "learning_rate": 6.95605109331024e-06,
50
+ "loss": 4.4737,
51
+ "step": 924
52
+ },
53
+ {
54
+ "epoch": 0.14005456671430427,
55
+ "grad_norm": 40.773719787597656,
56
+ "learning_rate": 8.122970339900411e-06,
57
+ "loss": 3.953,
58
+ "step": 1078
59
+ },
60
+ {
61
+ "epoch": 0.1600623619592049,
62
+ "grad_norm": 4.9041428565979,
63
+ "learning_rate": 9.28988958649058e-06,
64
+ "loss": 3.7847,
65
+ "step": 1232
66
+ },
67
+ {
68
+ "epoch": 0.1800701572041055,
69
+ "grad_norm": 40.02103805541992,
70
+ "learning_rate": 1.0456808833080752e-05,
71
+ "loss": 3.3807,
72
+ "step": 1386
73
+ },
74
+ {
75
+ "epoch": 0.2000779524490061,
76
+ "grad_norm": 17.65666961669922,
77
+ "learning_rate": 1.1623728079670923e-05,
78
+ "loss": 3.3067,
79
+ "step": 1540
80
+ },
81
+ {
82
+ "epoch": 0.2000779524490061,
83
+ "eval_Vitaminc_cosine_accuracy": 0.55859375,
84
+ "eval_Vitaminc_cosine_accuracy_threshold": 0.7416476011276245,
85
+ "eval_Vitaminc_cosine_ap": 0.5386900578010883,
86
+ "eval_Vitaminc_cosine_f1": 0.6542553191489362,
87
+ "eval_Vitaminc_cosine_f1_threshold": 0.39137744903564453,
88
+ "eval_Vitaminc_cosine_precision": 0.48616600790513836,
89
+ "eval_Vitaminc_cosine_recall": 1.0,
90
+ "eval_Vitaminc_dot_accuracy": 0.55859375,
91
+ "eval_Vitaminc_dot_accuracy_threshold": 168.04440307617188,
92
+ "eval_Vitaminc_dot_ap": 0.5406067293020295,
93
+ "eval_Vitaminc_dot_f1": 0.6542553191489362,
94
+ "eval_Vitaminc_dot_f1_threshold": 82.83587646484375,
95
+ "eval_Vitaminc_dot_precision": 0.48616600790513836,
96
+ "eval_Vitaminc_dot_recall": 1.0,
97
+ "eval_Vitaminc_euclidean_accuracy": 0.57421875,
98
+ "eval_Vitaminc_euclidean_accuracy_threshold": 10.546405792236328,
99
+ "eval_Vitaminc_euclidean_ap": 0.5319997521875279,
100
+ "eval_Vitaminc_euclidean_f1": 0.6556473829201103,
101
+ "eval_Vitaminc_euclidean_f1_threshold": 14.913542747497559,
102
+ "eval_Vitaminc_euclidean_precision": 0.49583333333333335,
103
+ "eval_Vitaminc_euclidean_recall": 0.967479674796748,
104
+ "eval_Vitaminc_manhattan_accuracy": 0.5625,
105
+ "eval_Vitaminc_manhattan_accuracy_threshold": 209.30810546875,
106
+ "eval_Vitaminc_manhattan_ap": 0.5310137997739139,
107
+ "eval_Vitaminc_manhattan_f1": 0.6594005449591281,
108
+ "eval_Vitaminc_manhattan_f1_threshold": 300.77703857421875,
109
+ "eval_Vitaminc_manhattan_precision": 0.4959016393442623,
110
+ "eval_Vitaminc_manhattan_recall": 0.983739837398374,
111
+ "eval_Vitaminc_max_accuracy": 0.57421875,
112
+ "eval_Vitaminc_max_accuracy_threshold": 209.30810546875,
113
+ "eval_Vitaminc_max_ap": 0.5406067293020295,
114
+ "eval_Vitaminc_max_f1": 0.6594005449591281,
115
+ "eval_Vitaminc_max_f1_threshold": 300.77703857421875,
116
+ "eval_Vitaminc_max_precision": 0.4959016393442623,
117
+ "eval_Vitaminc_max_recall": 1.0,
118
+ "eval_mrpc_cosine_accuracy": 0.70703125,
119
+ "eval_mrpc_cosine_accuracy_threshold": 0.7692825198173523,
120
+ "eval_mrpc_cosine_ap": 0.7930850238332522,
121
+ "eval_mrpc_cosine_f1": 0.8009708737864077,
122
+ "eval_mrpc_cosine_f1_threshold": 0.6043864488601685,
123
+ "eval_mrpc_cosine_precision": 0.6762295081967213,
124
+ "eval_mrpc_cosine_recall": 0.9821428571428571,
125
+ "eval_mrpc_dot_accuracy": 0.68359375,
126
+ "eval_mrpc_dot_accuracy_threshold": 111.09579467773438,
127
+ "eval_mrpc_dot_ap": 0.685668349677386,
128
+ "eval_mrpc_dot_f1": 0.8028846153846153,
129
+ "eval_mrpc_dot_f1_threshold": 100.36712646484375,
130
+ "eval_mrpc_dot_precision": 0.6733870967741935,
131
+ "eval_mrpc_dot_recall": 0.9940476190476191,
132
+ "eval_mrpc_euclidean_accuracy": 0.6953125,
133
+ "eval_mrpc_euclidean_accuracy_threshold": 8.249982833862305,
134
+ "eval_mrpc_euclidean_ap": 0.8099812581176395,
135
+ "eval_mrpc_euclidean_f1": 0.7999999999999999,
136
+ "eval_mrpc_euclidean_f1_threshold": 10.622720718383789,
137
+ "eval_mrpc_euclidean_precision": 0.6960352422907489,
138
+ "eval_mrpc_euclidean_recall": 0.9404761904761905,
139
+ "eval_mrpc_manhattan_accuracy": 0.6953125,
140
+ "eval_mrpc_manhattan_accuracy_threshold": 166.01010131835938,
141
+ "eval_mrpc_manhattan_ap": 0.8185487109494757,
142
+ "eval_mrpc_manhattan_f1": 0.7970660146699267,
143
+ "eval_mrpc_manhattan_f1_threshold": 243.34291076660156,
144
+ "eval_mrpc_manhattan_precision": 0.6763485477178424,
145
+ "eval_mrpc_manhattan_recall": 0.9702380952380952,
146
+ "eval_mrpc_max_accuracy": 0.70703125,
147
+ "eval_mrpc_max_accuracy_threshold": 166.01010131835938,
148
+ "eval_mrpc_max_ap": 0.8185487109494757,
149
+ "eval_mrpc_max_f1": 0.8028846153846153,
150
+ "eval_mrpc_max_f1_threshold": 243.34291076660156,
151
+ "eval_mrpc_max_precision": 0.6960352422907489,
152
+ "eval_mrpc_max_recall": 0.9940476190476191,
153
+ "eval_negation_cosine_accuracy": 1.0,
154
+ "eval_negation_dot_accuracy": 0.0,
155
+ "eval_negation_euclidean_accuracy": 1.0,
156
+ "eval_negation_manhattan_accuracy": 1.0,
157
+ "eval_negation_max_accuracy": 1.0,
158
+ "eval_nli-pairs_loss": 2.87699031829834,
159
+ "eval_nli-pairs_runtime": 3.3618,
160
+ "eval_nli-pairs_samples_per_second": 190.374,
161
+ "eval_nli-pairs_steps_per_second": 4.759,
162
+ "eval_sequential_score": 0.5406067293020295,
163
+ "eval_sts-test_pearson_cosine": 0.731642468323062,
164
+ "eval_sts-test_pearson_dot": 0.7060174899825389,
165
+ "eval_sts-test_pearson_euclidean": 0.7213340791831213,
166
+ "eval_sts-test_pearson_manhattan": 0.7306876393922224,
167
+ "eval_sts-test_pearson_max": 0.731642468323062,
168
+ "eval_sts-test_spearman_cosine": 0.7388342851809555,
169
+ "eval_sts-test_spearman_dot": 0.7163801725525887,
170
+ "eval_sts-test_spearman_euclidean": 0.7248067929450137,
171
+ "eval_sts-test_spearman_manhattan": 0.7326322535482364,
172
+ "eval_sts-test_spearman_max": 0.7388342851809555,
173
+ "step": 1540
174
+ },
175
+ {
176
+ "epoch": 0.2000779524490061,
177
+ "eval_vitaminc-pairs_loss": 5.996066570281982,
178
+ "eval_vitaminc-pairs_runtime": 0.7123,
179
+ "eval_vitaminc-pairs_samples_per_second": 151.627,
180
+ "eval_vitaminc-pairs_steps_per_second": 4.212,
181
+ "step": 1540
182
+ },
183
+ {
184
+ "epoch": 0.2000779524490061,
185
+ "eval_negation-triplets_loss": 4.761821746826172,
186
+ "eval_negation-triplets_runtime": 0.1742,
187
+ "eval_negation-triplets_samples_per_second": 510.962,
188
+ "eval_negation-triplets_steps_per_second": 17.223,
189
+ "step": 1540
190
+ },
191
+ {
192
+ "epoch": 0.2000779524490061,
193
+ "eval_qnli-contrastive_loss": 3.127272605895996,
194
+ "eval_qnli-contrastive_runtime": 0.2192,
195
+ "eval_qnli-contrastive_samples_per_second": 583.919,
196
+ "eval_qnli-contrastive_steps_per_second": 18.247,
197
+ "step": 1540
198
+ },
199
+ {
200
+ "epoch": 0.2000779524490061,
201
+ "eval_scitail-pairs-qa_loss": 0.2922694981098175,
202
+ "eval_scitail-pairs-qa_runtime": 0.5702,
203
+ "eval_scitail-pairs-qa_samples_per_second": 224.477,
204
+ "eval_scitail-pairs-qa_steps_per_second": 7.015,
205
+ "step": 1540
206
+ },
207
+ {
208
+ "epoch": 0.2000779524490061,
209
+ "eval_scitail-pairs-pos_loss": 1.1873807907104492,
210
+ "eval_scitail-pairs-pos_runtime": 1.0164,
211
+ "eval_scitail-pairs-pos_samples_per_second": 125.931,
212
+ "eval_scitail-pairs-pos_steps_per_second": 3.935,
213
+ "step": 1540
214
+ },
215
+ {
216
+ "epoch": 0.2000779524490061,
217
+ "eval_xsum-pairs_loss": 1.7732343673706055,
218
+ "eval_xsum-pairs_runtime": 0.9302,
219
+ "eval_xsum-pairs_samples_per_second": 137.603,
220
+ "eval_xsum-pairs_steps_per_second": 4.3,
221
+ "step": 1540
222
+ },
223
+ {
224
+ "epoch": 0.2000779524490061,
225
+ "eval_compression-pairs_loss": 0.9279663562774658,
226
+ "eval_compression-pairs_runtime": 0.1883,
227
+ "eval_compression-pairs_samples_per_second": 679.719,
228
+ "eval_compression-pairs_steps_per_second": 21.241,
229
+ "step": 1540
230
+ },
231
+ {
232
+ "epoch": 0.2000779524490061,
233
+ "eval_sciq_pairs_loss": 0.772415816783905,
234
+ "eval_sciq_pairs_runtime": 4.2316,
235
+ "eval_sciq_pairs_samples_per_second": 30.249,
236
+ "eval_sciq_pairs_steps_per_second": 0.945,
237
+ "step": 1540
238
+ },
239
+ {
240
+ "epoch": 0.2000779524490061,
241
+ "eval_qasc_pairs_loss": 2.3403422832489014,
242
+ "eval_qasc_pairs_runtime": 0.7364,
243
+ "eval_qasc_pairs_samples_per_second": 173.826,
244
+ "eval_qasc_pairs_steps_per_second": 5.432,
245
+ "step": 1540
246
+ },
247
+ {
248
+ "epoch": 0.2000779524490061,
249
+ "eval_qasc_facts_sym_loss": 1.3449885845184326,
250
+ "eval_qasc_facts_sym_runtime": 0.1874,
251
+ "eval_qasc_facts_sym_samples_per_second": 683.091,
252
+ "eval_qasc_facts_sym_steps_per_second": 21.347,
253
+ "step": 1540
254
+ },
255
+ {
256
+ "epoch": 0.2000779524490061,
257
+ "eval_openbookqa_pairs_loss": 3.779522657394409,
258
+ "eval_openbookqa_pairs_runtime": 0.6356,
259
+ "eval_openbookqa_pairs_samples_per_second": 201.399,
260
+ "eval_openbookqa_pairs_steps_per_second": 6.294,
261
+ "step": 1540
262
+ },
263
+ {
264
+ "epoch": 0.2000779524490061,
265
+ "eval_msmarco_pairs_loss": 3.28946852684021,
266
+ "eval_msmarco_pairs_runtime": 1.6437,
267
+ "eval_msmarco_pairs_samples_per_second": 77.871,
268
+ "eval_msmarco_pairs_steps_per_second": 2.433,
269
+ "step": 1540
270
+ },
271
+ {
272
+ "epoch": 0.2000779524490061,
273
+ "eval_nq_pairs_loss": 3.1534435749053955,
274
+ "eval_nq_pairs_runtime": 3.0776,
275
+ "eval_nq_pairs_samples_per_second": 41.591,
276
+ "eval_nq_pairs_steps_per_second": 1.3,
277
+ "step": 1540
278
+ },
279
+ {
280
+ "epoch": 0.2000779524490061,
281
+ "eval_trivia_pairs_loss": 3.8939096927642822,
282
+ "eval_trivia_pairs_runtime": 5.1055,
283
+ "eval_trivia_pairs_samples_per_second": 25.071,
284
+ "eval_trivia_pairs_steps_per_second": 0.783,
285
+ "step": 1540
286
+ },
287
+ {
288
+ "epoch": 0.2000779524490061,
289
+ "eval_quora_pairs_loss": 1.3539156913757324,
290
+ "eval_quora_pairs_runtime": 17.3308,
291
+ "eval_quora_pairs_samples_per_second": 92.321,
292
+ "eval_quora_pairs_steps_per_second": 2.25,
293
+ "step": 1540
294
+ },
295
+ {
296
+ "epoch": 0.2000779524490061,
297
+ "eval_gooaq_pairs_loss": 3.378002405166626,
298
+ "eval_gooaq_pairs_runtime": 1.018,
299
+ "eval_gooaq_pairs_samples_per_second": 125.731,
300
+ "eval_gooaq_pairs_steps_per_second": 3.929,
301
+ "step": 1540
302
+ },
303
+ {
304
+ "epoch": 0.2000779524490061,
305
+ "eval_mrpc_pairs_loss": 0.49983811378479004,
306
+ "eval_mrpc_pairs_runtime": 0.1796,
307
+ "eval_mrpc_pairs_samples_per_second": 712.509,
308
+ "eval_mrpc_pairs_steps_per_second": 22.266,
309
+ "step": 1540
310
+ }
311
+ ],
312
+ "logging_steps": 154,
313
+ "max_steps": 30788,
314
+ "num_input_tokens_seen": 0,
315
+ "num_train_epochs": 4,
316
+ "save_steps": 1540,
317
+ "stateful_callbacks": {
318
+ "TrainerControl": {
319
+ "args": {
320
+ "should_epoch_stop": false,
321
+ "should_evaluate": false,
322
+ "should_log": false,
323
+ "should_save": true,
324
+ "should_training_stop": false
325
+ },
326
+ "attributes": {}
327
+ }
328
+ },
329
+ "total_flos": 0.0,
330
+ "train_batch_size": 42,
331
+ "trial_name": null,
332
+ "trial_params": null
333
+ }
checkpoint-1540/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:046f35193fb875fe3ff7d615dff47f0dba2b07fce875797be7a677efbfacd570
3
+ size 5624