bobox commited on
Commit
eff020b
1 Parent(s): f0c9976

Training in progress, step 107, checkpoint

Browse files
checkpoint-107/README.md CHANGED
@@ -164,34 +164,34 @@ model-index:
164
  type: sts-test
165
  metrics:
166
  - type: pearson_cosine
167
- value: 0.45494716382349193
168
  name: Pearson Cosine
169
  - type: spearman_cosine
170
- value: 0.48921418507251446
171
  name: Spearman Cosine
172
  - type: pearson_manhattan
173
- value: 0.4417008219313264
174
  name: Pearson Manhattan
175
  - type: spearman_manhattan
176
- value: 0.46068648052845956
177
  name: Spearman Manhattan
178
  - type: pearson_euclidean
179
- value: 0.4480861256491879
180
  name: Pearson Euclidean
181
  - type: spearman_euclidean
182
- value: 0.4610824798409968
183
  name: Spearman Euclidean
184
  - type: pearson_dot
185
- value: 0.44837123659858896
186
  name: Pearson Dot
187
  - type: spearman_dot
188
- value: 0.46707725062744593
189
  name: Spearman Dot
190
  - type: pearson_max
191
- value: 0.45494716382349193
192
  name: Pearson Max
193
  - type: spearman_max
194
- value: 0.48921418507251446
195
  name: Spearman Max
196
  - task:
197
  type: triplet
@@ -223,109 +223,109 @@ model-index:
223
  type: VitaminC
224
  metrics:
225
  - type: cosine_accuracy
226
- value: 0.5546875
227
  name: Cosine Accuracy
228
  - type: cosine_accuracy_threshold
229
- value: 0.9041332006454468
230
  name: Cosine Accuracy Threshold
231
  - type: cosine_f1
232
- value: 0.6542553191489362
233
  name: Cosine F1
234
  - type: cosine_f1_threshold
235
- value: 0.452939510345459
236
  name: Cosine F1 Threshold
237
  - type: cosine_precision
238
- value: 0.48616600790513836
239
  name: Cosine Precision
240
  - type: cosine_recall
241
  value: 1.0
242
  name: Cosine Recall
243
  - type: cosine_ap
244
- value: 0.5292859731465609
245
  name: Cosine Ap
246
  - type: dot_accuracy
247
- value: 0.5546875
248
  name: Dot Accuracy
249
  - type: dot_accuracy_threshold
250
- value: 414.42559814453125
251
  name: Dot Accuracy Threshold
252
  - type: dot_f1
253
- value: 0.6542553191489362
254
  name: Dot F1
255
  - type: dot_f1_threshold
256
- value: 212.6934814453125
257
  name: Dot F1 Threshold
258
  - type: dot_precision
259
- value: 0.48616600790513836
260
  name: Dot Precision
261
  - type: dot_recall
262
  value: 1.0
263
  name: Dot Recall
264
  - type: dot_ap
265
- value: 0.5222732504955002
266
  name: Dot Ap
267
  - type: manhattan_accuracy
268
- value: 0.55859375
269
  name: Manhattan Accuracy
270
  - type: manhattan_accuracy_threshold
271
- value: 173.8212127685547
272
  name: Manhattan Accuracy Threshold
273
  - type: manhattan_f1
274
- value: 0.6542553191489362
275
  name: Manhattan F1
276
  - type: manhattan_f1_threshold
277
- value: 415.5366516113281
278
  name: Manhattan F1 Threshold
279
  - type: manhattan_precision
280
- value: 0.48616600790513836
281
  name: Manhattan Precision
282
  - type: manhattan_recall
283
- value: 1.0
284
  name: Manhattan Recall
285
  - type: manhattan_ap
286
- value: 0.5305698453165033
287
  name: Manhattan Ap
288
  - type: euclidean_accuracy
289
- value: 0.5546875
290
  name: Euclidean Accuracy
291
  - type: euclidean_accuracy_threshold
292
- value: 9.18377685546875
293
  name: Euclidean Accuracy Threshold
294
  - type: euclidean_f1
295
- value: 0.6542553191489362
296
  name: Euclidean F1
297
  - type: euclidean_f1_threshold
298
- value: 22.683509826660156
299
  name: Euclidean F1 Threshold
300
  - type: euclidean_precision
301
- value: 0.48616600790513836
302
  name: Euclidean Precision
303
  - type: euclidean_recall
304
  value: 1.0
305
  name: Euclidean Recall
306
  - type: euclidean_ap
307
- value: 0.5291787221346742
308
  name: Euclidean Ap
309
  - type: max_accuracy
310
- value: 0.55859375
311
  name: Max Accuracy
312
  - type: max_accuracy_threshold
313
- value: 414.42559814453125
314
  name: Max Accuracy Threshold
315
  - type: max_f1
316
- value: 0.6542553191489362
317
  name: Max F1
318
  - type: max_f1_threshold
319
- value: 415.5366516113281
320
  name: Max F1 Threshold
321
  - type: max_precision
322
- value: 0.48616600790513836
323
  name: Max Precision
324
  - type: max_recall
325
  value: 1.0
326
  name: Max Recall
327
  - type: max_ap
328
- value: 0.5305698453165033
329
  name: Max Ap
330
  ---
331
 
@@ -388,7 +388,7 @@ Then you can load this model and run inference.
388
  from sentence_transformers import SentenceTransformer
389
 
390
  # Download from the 🤗 Hub
391
- model = SentenceTransformer("bobox/DeBERTa-small-ST-v1-toytest-checkpoints-tmp")
392
  # Run inference
393
  sentences = [
394
  'who did ben assault in home and away',
@@ -439,16 +439,16 @@ You can finetune this model on your own dataset.
439
 
440
  | Metric | Value |
441
  |:--------------------|:-----------|
442
- | pearson_cosine | 0.4549 |
443
- | **spearman_cosine** | **0.4892** |
444
- | pearson_manhattan | 0.4417 |
445
- | spearman_manhattan | 0.4607 |
446
- | pearson_euclidean | 0.4481 |
447
- | spearman_euclidean | 0.4611 |
448
- | pearson_dot | 0.4484 |
449
- | spearman_dot | 0.4671 |
450
- | pearson_max | 0.4549 |
451
- | spearman_max | 0.4892 |
452
 
453
  #### Triplet
454
  * Dataset: `NLI-v2`
@@ -468,41 +468,41 @@ You can finetune this model on your own dataset.
468
 
469
  | Metric | Value |
470
  |:-----------------------------|:-----------|
471
- | cosine_accuracy | 0.5547 |
472
- | cosine_accuracy_threshold | 0.9041 |
473
- | cosine_f1 | 0.6543 |
474
- | cosine_f1_threshold | 0.4529 |
475
- | cosine_precision | 0.4862 |
476
  | cosine_recall | 1.0 |
477
- | cosine_ap | 0.5293 |
478
- | dot_accuracy | 0.5547 |
479
- | dot_accuracy_threshold | 414.4256 |
480
- | dot_f1 | 0.6543 |
481
- | dot_f1_threshold | 212.6935 |
482
- | dot_precision | 0.4862 |
483
  | dot_recall | 1.0 |
484
- | dot_ap | 0.5223 |
485
- | manhattan_accuracy | 0.5586 |
486
- | manhattan_accuracy_threshold | 173.8212 |
487
- | manhattan_f1 | 0.6543 |
488
- | manhattan_f1_threshold | 415.5367 |
489
- | manhattan_precision | 0.4862 |
490
- | manhattan_recall | 1.0 |
491
- | manhattan_ap | 0.5306 |
492
- | euclidean_accuracy | 0.5547 |
493
- | euclidean_accuracy_threshold | 9.1838 |
494
- | euclidean_f1 | 0.6543 |
495
- | euclidean_f1_threshold | 22.6835 |
496
- | euclidean_precision | 0.4862 |
497
  | euclidean_recall | 1.0 |
498
- | euclidean_ap | 0.5292 |
499
- | max_accuracy | 0.5586 |
500
- | max_accuracy_threshold | 414.4256 |
501
- | max_f1 | 0.6543 |
502
- | max_f1_threshold | 415.5367 |
503
- | max_precision | 0.4862 |
504
  | max_recall | 1.0 |
505
- | **max_ap** | **0.5306** |
506
 
507
  <!--
508
  ## Bias, Risks and Limitations
@@ -1151,14 +1151,14 @@ You can finetune this model on your own dataset.
1151
  #### Non-Default Hyperparameters
1152
 
1153
  - `eval_strategy`: steps
1154
- - `per_device_train_batch_size`: 160
1155
  - `per_device_eval_batch_size`: 64
1156
- - `gradient_accumulation_steps`: 8
1157
  - `learning_rate`: 4e-05
1158
- - `weight_decay`: 0.0001
1159
  - `lr_scheduler_type`: cosine_with_min_lr
1160
- - `lr_scheduler_kwargs`: {'num_cycles': 0.5, 'min_lr': 1.3333333333333335e-05}
1161
- - `warmup_ratio`: 0.33
1162
  - `save_safetensors`: False
1163
  - `fp16`: True
1164
  - `push_to_hub`: True
@@ -1173,14 +1173,14 @@ You can finetune this model on your own dataset.
1173
  - `do_predict`: False
1174
  - `eval_strategy`: steps
1175
  - `prediction_loss_only`: True
1176
- - `per_device_train_batch_size`: 160
1177
  - `per_device_eval_batch_size`: 64
1178
  - `per_gpu_train_batch_size`: None
1179
  - `per_gpu_eval_batch_size`: None
1180
- - `gradient_accumulation_steps`: 8
1181
  - `eval_accumulation_steps`: None
1182
  - `learning_rate`: 4e-05
1183
- - `weight_decay`: 0.0001
1184
  - `adam_beta1`: 0.9
1185
  - `adam_beta2`: 0.999
1186
  - `adam_epsilon`: 1e-08
@@ -1188,8 +1188,8 @@ You can finetune this model on your own dataset.
1188
  - `num_train_epochs`: 3
1189
  - `max_steps`: -1
1190
  - `lr_scheduler_type`: cosine_with_min_lr
1191
- - `lr_scheduler_kwargs`: {'num_cycles': 0.5, 'min_lr': 1.3333333333333335e-05}
1192
- - `warmup_ratio`: 0.33
1193
  - `warmup_steps`: 0
1194
  - `log_level`: passive
1195
  - `log_level_replica`: warning
@@ -1282,6 +1282,8 @@ You can finetune this model on your own dataset.
1282
  </details>
1283
 
1284
  ### Training Logs
 
 
1285
  | Epoch | Step | Training Loss | vitaminc-pairs loss | trivia pairs loss | xsum-pairs loss | paws-pos loss | sciq pairs loss | msmarco pairs loss | openbookqa pairs loss | gooaq pairs loss | nq pairs loss | scitail-pairs-pos loss | qasc pairs loss | negation-triplets loss | NLI-v2_max_accuracy | VitaminC_max_ap | sts-test_spearman_cosine |
1286
  |:------:|:----:|:-------------:|:-------------------:|:-----------------:|:---------------:|:-------------:|:---------------:|:------------------:|:---------------------:|:----------------:|:-------------:|:----------------------:|:---------------:|:----------------------:|:-------------------:|:---------------:|:------------------------:|
1287
  | 0.0169 | 3 | 7.2372 | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - |
@@ -1319,7 +1321,185 @@ You can finetune this model on your own dataset.
1319
  | 0.5589 | 99 | 2.1857 | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - |
1320
  | 0.5759 | 102 | 1.8881 | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - |
1321
  | 0.5928 | 105 | 2.2699 | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - |
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1322
 
 
1323
 
1324
  ### Framework Versions
1325
  - Python: 3.10.13
 
164
  type: sts-test
165
  metrics:
166
  - type: pearson_cosine
167
+ value: 0.8483316632682467
168
  name: Pearson Cosine
169
  - type: spearman_cosine
170
+ value: 0.8903503892346
171
  name: Spearman Cosine
172
  - type: pearson_manhattan
173
+ value: 0.8815226866327923
174
  name: Pearson Manhattan
175
  - type: spearman_manhattan
176
+ value: 0.8865568876827619
177
  name: Spearman Manhattan
178
  - type: pearson_euclidean
179
+ value: 0.8814283057813619
180
  name: Pearson Euclidean
181
  - type: spearman_euclidean
182
+ value: 0.8851830636663006
183
  name: Spearman Euclidean
184
  - type: pearson_dot
185
+ value: 0.8392403098680445
186
  name: Pearson Dot
187
  - type: spearman_dot
188
+ value: 0.857844431199042
189
  name: Spearman Dot
190
  - type: pearson_max
191
+ value: 0.8815226866327923
192
  name: Pearson Max
193
  - type: spearman_max
194
+ value: 0.8903503892346
195
  name: Spearman Max
196
  - task:
197
  type: triplet
 
223
  type: VitaminC
224
  metrics:
225
  - type: cosine_accuracy
226
+ value: 0.578125
227
  name: Cosine Accuracy
228
  - type: cosine_accuracy_threshold
229
+ value: 0.7859437465667725
230
  name: Cosine Accuracy Threshold
231
  - type: cosine_f1
232
+ value: 0.6595174262734584
233
  name: Cosine F1
234
  - type: cosine_f1_threshold
235
+ value: 0.3211573362350464
236
  name: Cosine F1 Threshold
237
  - type: cosine_precision
238
+ value: 0.492
239
  name: Cosine Precision
240
  - type: cosine_recall
241
  value: 1.0
242
  name: Cosine Recall
243
  - type: cosine_ap
244
+ value: 0.5557444337961499
245
  name: Cosine Ap
246
  - type: dot_accuracy
247
+ value: 0.578125
248
  name: Dot Accuracy
249
  - type: dot_accuracy_threshold
250
+ value: 315.9444580078125
251
  name: Dot Accuracy Threshold
252
  - type: dot_f1
253
+ value: 0.6595174262734584
254
  name: Dot F1
255
  - type: dot_f1_threshold
256
+ value: 129.88558959960938
257
  name: Dot F1 Threshold
258
  - type: dot_precision
259
+ value: 0.492
260
  name: Dot Precision
261
  - type: dot_recall
262
  value: 1.0
263
  name: Dot Recall
264
  - type: dot_ap
265
+ value: 0.5539524528858992
266
  name: Dot Ap
267
  - type: manhattan_accuracy
268
+ value: 0.578125
269
  name: Manhattan Accuracy
270
  - type: manhattan_accuracy_threshold
271
+ value: 276.40142822265625
272
  name: Manhattan Accuracy Threshold
273
  - type: manhattan_f1
274
+ value: 0.6576819407008085
275
  name: Manhattan F1
276
  - type: manhattan_f1_threshold
277
+ value: 469.7353515625
278
  name: Manhattan F1 Threshold
279
  - type: manhattan_precision
280
+ value: 0.49193548387096775
281
  name: Manhattan Precision
282
  - type: manhattan_recall
283
+ value: 0.991869918699187
284
  name: Manhattan Recall
285
  - type: manhattan_ap
286
+ value: 0.5429240708188645
287
  name: Manhattan Ap
288
  - type: euclidean_accuracy
289
+ value: 0.58203125
290
  name: Euclidean Accuracy
291
  - type: euclidean_accuracy_threshold
292
+ value: 13.113249778747559
293
  name: Euclidean Accuracy Threshold
294
  - type: euclidean_f1
295
+ value: 0.6577540106951871
296
  name: Euclidean F1
297
  - type: euclidean_f1_threshold
298
+ value: 23.90462303161621
299
  name: Euclidean F1 Threshold
300
  - type: euclidean_precision
301
+ value: 0.4900398406374502
302
  name: Euclidean Precision
303
  - type: euclidean_recall
304
  value: 1.0
305
  name: Euclidean Recall
306
  - type: euclidean_ap
307
+ value: 0.5510190217865811
308
  name: Euclidean Ap
309
  - type: max_accuracy
310
+ value: 0.58203125
311
  name: Max Accuracy
312
  - type: max_accuracy_threshold
313
+ value: 315.9444580078125
314
  name: Max Accuracy Threshold
315
  - type: max_f1
316
+ value: 0.6595174262734584
317
  name: Max F1
318
  - type: max_f1_threshold
319
+ value: 469.7353515625
320
  name: Max F1 Threshold
321
  - type: max_precision
322
+ value: 0.492
323
  name: Max Precision
324
  - type: max_recall
325
  value: 1.0
326
  name: Max Recall
327
  - type: max_ap
328
+ value: 0.5557444337961499
329
  name: Max Ap
330
  ---
331
 
 
388
  from sentence_transformers import SentenceTransformer
389
 
390
  # Download from the 🤗 Hub
391
+ model = SentenceTransformer("bobox/DeBERTa-small-ST-v1-toytest")
392
  # Run inference
393
  sentences = [
394
  'who did ben assault in home and away',
 
439
 
440
  | Metric | Value |
441
  |:--------------------|:-----------|
442
+ | pearson_cosine | 0.8483 |
443
+ | **spearman_cosine** | **0.8904** |
444
+ | pearson_manhattan | 0.8815 |
445
+ | spearman_manhattan | 0.8866 |
446
+ | pearson_euclidean | 0.8814 |
447
+ | spearman_euclidean | 0.8852 |
448
+ | pearson_dot | 0.8392 |
449
+ | spearman_dot | 0.8578 |
450
+ | pearson_max | 0.8815 |
451
+ | spearman_max | 0.8904 |
452
 
453
  #### Triplet
454
  * Dataset: `NLI-v2`
 
468
 
469
  | Metric | Value |
470
  |:-----------------------------|:-----------|
471
+ | cosine_accuracy | 0.5781 |
472
+ | cosine_accuracy_threshold | 0.7859 |
473
+ | cosine_f1 | 0.6595 |
474
+ | cosine_f1_threshold | 0.3212 |
475
+ | cosine_precision | 0.492 |
476
  | cosine_recall | 1.0 |
477
+ | cosine_ap | 0.5557 |
478
+ | dot_accuracy | 0.5781 |
479
+ | dot_accuracy_threshold | 315.9445 |
480
+ | dot_f1 | 0.6595 |
481
+ | dot_f1_threshold | 129.8856 |
482
+ | dot_precision | 0.492 |
483
  | dot_recall | 1.0 |
484
+ | dot_ap | 0.554 |
485
+ | manhattan_accuracy | 0.5781 |
486
+ | manhattan_accuracy_threshold | 276.4014 |
487
+ | manhattan_f1 | 0.6577 |
488
+ | manhattan_f1_threshold | 469.7354 |
489
+ | manhattan_precision | 0.4919 |
490
+ | manhattan_recall | 0.9919 |
491
+ | manhattan_ap | 0.5429 |
492
+ | euclidean_accuracy | 0.582 |
493
+ | euclidean_accuracy_threshold | 13.1132 |
494
+ | euclidean_f1 | 0.6578 |
495
+ | euclidean_f1_threshold | 23.9046 |
496
+ | euclidean_precision | 0.49 |
497
  | euclidean_recall | 1.0 |
498
+ | euclidean_ap | 0.551 |
499
+ | max_accuracy | 0.582 |
500
+ | max_accuracy_threshold | 315.9445 |
501
+ | max_f1 | 0.6595 |
502
+ | max_f1_threshold | 469.7354 |
503
+ | max_precision | 0.492 |
504
  | max_recall | 1.0 |
505
+ | **max_ap** | **0.5557** |
506
 
507
  <!--
508
  ## Bias, Risks and Limitations
 
1151
  #### Non-Default Hyperparameters
1152
 
1153
  - `eval_strategy`: steps
1154
+ - `per_device_train_batch_size`: 320
1155
  - `per_device_eval_batch_size`: 64
1156
+ - `gradient_accumulation_steps`: 4
1157
  - `learning_rate`: 4e-05
1158
+ - `weight_decay`: 5e-05
1159
  - `lr_scheduler_type`: cosine_with_min_lr
1160
+ - `lr_scheduler_kwargs`: {'num_cycles': 0.5, 'min_lr': 1e-05}
1161
+ - `warmup_ratio`: 0.15
1162
  - `save_safetensors`: False
1163
  - `fp16`: True
1164
  - `push_to_hub`: True
 
1173
  - `do_predict`: False
1174
  - `eval_strategy`: steps
1175
  - `prediction_loss_only`: True
1176
+ - `per_device_train_batch_size`: 320
1177
  - `per_device_eval_batch_size`: 64
1178
  - `per_gpu_train_batch_size`: None
1179
  - `per_gpu_eval_batch_size`: None
1180
+ - `gradient_accumulation_steps`: 4
1181
  - `eval_accumulation_steps`: None
1182
  - `learning_rate`: 4e-05
1183
+ - `weight_decay`: 5e-05
1184
  - `adam_beta1`: 0.9
1185
  - `adam_beta2`: 0.999
1186
  - `adam_epsilon`: 1e-08
 
1188
  - `num_train_epochs`: 3
1189
  - `max_steps`: -1
1190
  - `lr_scheduler_type`: cosine_with_min_lr
1191
+ - `lr_scheduler_kwargs`: {'num_cycles': 0.5, 'min_lr': 1e-05}
1192
+ - `warmup_ratio`: 0.15
1193
  - `warmup_steps`: 0
1194
  - `log_level`: passive
1195
  - `log_level_replica`: warning
 
1282
  </details>
1283
 
1284
  ### Training Logs
1285
+ <details><summary>Click to expand</summary>
1286
+
1287
  | Epoch | Step | Training Loss | vitaminc-pairs loss | trivia pairs loss | xsum-pairs loss | paws-pos loss | sciq pairs loss | msmarco pairs loss | openbookqa pairs loss | gooaq pairs loss | nq pairs loss | scitail-pairs-pos loss | qasc pairs loss | negation-triplets loss | NLI-v2_max_accuracy | VitaminC_max_ap | sts-test_spearman_cosine |
1288
  |:------:|:----:|:-------------:|:-------------------:|:-----------------:|:---------------:|:-------------:|:---------------:|:------------------:|:---------------------:|:----------------:|:-------------:|:----------------------:|:---------------:|:----------------------:|:-------------------:|:---------------:|:------------------------:|
1289
  | 0.0169 | 3 | 7.2372 | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - |
 
1321
  | 0.5589 | 99 | 2.1857 | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - |
1322
  | 0.5759 | 102 | 1.8881 | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - |
1323
  | 0.5928 | 105 | 2.2699 | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - |
1324
+ | 0.6097 | 108 | 2.1425 | 2.7217 | 1.7080 | 1.2066 | 0.0800 | 0.0949 | 1.6446 | 1.5739 | 1.7924 | 2.3649 | 0.2329 | 0.8462 | 2.3389 | 1.0 | 0.5323 | 0.7806 |
1325
+ | 0.6267 | 111 | 2.1276 | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - |
1326
+ | 0.6436 | 114 | 1.7531 | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - |
1327
+ | 0.6606 | 117 | 2.0179 | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - |
1328
+ | 0.6775 | 120 | 1.5305 | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - |
1329
+ | 0.6944 | 123 | 1.6925 | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - |
1330
+ | 0.7114 | 126 | 1.5248 | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - |
1331
+ | 0.7283 | 129 | 1.523 | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - |
1332
+ | 0.7452 | 132 | 1.5474 | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - |
1333
+ | 0.7622 | 135 | 1.7221 | 2.8521 | 1.4495 | 0.7707 | 0.0601 | 0.0751 | 1.1524 | 1.4015 | 1.3955 | 1.7769 | 0.2150 | 0.6356 | 2.0742 | 1.0 | 0.5327 | 0.8315 |
1334
+ | 0.7791 | 138 | 1.5366 | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - |
1335
+ | 0.7960 | 141 | 1.3045 | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - |
1336
+ | 0.8130 | 144 | 1.1999 | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - |
1337
+ | 0.8299 | 147 | 1.3483 | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - |
1338
+ | 0.8469 | 150 | 1.2009 | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - |
1339
+ | 0.8638 | 153 | 1.4495 | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - |
1340
+ | 0.8807 | 156 | 1.2329 | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - |
1341
+ | 0.8977 | 159 | 1.1905 | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - |
1342
+ | 0.9146 | 162 | 1.277 | 2.7764 | 1.2929 | 0.5587 | 0.0525 | 0.0604 | 0.8656 | 1.1903 | 1.1581 | 1.1554 | 0.1988 | 0.4943 | 2.0055 | 1.0 | 0.5311 | 0.8548 |
1343
+ | 0.9315 | 165 | 1.339 | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - |
1344
+ | 0.9485 | 168 | 1.1535 | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - |
1345
+ | 0.9654 | 171 | 1.1643 | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - |
1346
+ | 0.9824 | 174 | 1.2221 | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - |
1347
+ | 0.9993 | 177 | 1.0974 | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - |
1348
+ | 1.0162 | 180 | 1.0984 | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - |
1349
+ | 1.0332 | 183 | 1.0543 | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - |
1350
+ | 1.0501 | 186 | 1.0994 | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - |
1351
+ | 1.0670 | 189 | 1.0621 | 2.6755 | 1.2004 | 0.3837 | 0.0421 | 0.0556 | 0.6897 | 1.0837 | 1.0353 | 0.9604 | 0.1854 | 0.4047 | 1.9071 | 1.0 | 0.5420 | 0.8680 |
1352
+ | 1.0840 | 192 | 0.8724 | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - |
1353
+ | 1.1009 | 195 | 0.9381 | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - |
1354
+ | 1.1179 | 198 | 0.9617 | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - |
1355
+ | 1.1348 | 201 | 1.0139 | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - |
1356
+ | 1.1517 | 204 | 1.1073 | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - |
1357
+ | 1.1687 | 207 | 0.8365 | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - |
1358
+ | 1.1856 | 210 | 1.1012 | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - |
1359
+ | 1.2025 | 213 | 1.0016 | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - |
1360
+ | 1.2195 | 216 | 1.0957 | 2.5466 | 1.1412 | 0.3591 | 0.0395 | 0.0517 | 0.5819 | 0.9366 | 0.9686 | 0.8172 | 0.1901 | 0.3075 | 1.9161 | 1.0 | 0.5385 | 0.8656 |
1361
+ | 1.2364 | 219 | 1.1273 | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - |
1362
+ | 1.2534 | 222 | 1.2568 | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - |
1363
+ | 1.2703 | 225 | 0.873 | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - |
1364
+ | 1.2872 | 228 | 1.0003 | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - |
1365
+ | 1.3042 | 231 | 1.142 | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - |
1366
+ | 1.3211 | 234 | 0.807 | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - |
1367
+ | 1.3380 | 237 | 1.0231 | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - |
1368
+ | 1.3550 | 240 | 0.797 | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - |
1369
+ | 1.3719 | 243 | 0.8473 | 2.5140 | 1.1067 | 0.2802 | 0.0343 | 0.0467 | 0.5559 | 0.8562 | 0.8929 | 0.7435 | 0.1750 | 0.2355 | 1.8629 | 1.0 | 0.5508 | 0.8687 |
1370
+ | 1.3888 | 246 | 0.9531 | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - |
1371
+ | 1.4058 | 249 | 0.9023 | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - |
1372
+ | 1.4227 | 252 | 0.8922 | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - |
1373
+ | 1.4397 | 255 | 0.9874 | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - |
1374
+ | 1.4566 | 258 | 0.8508 | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - |
1375
+ | 1.4735 | 261 | 0.7149 | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - |
1376
+ | 1.4905 | 264 | 0.894 | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - |
1377
+ | 1.5074 | 267 | 0.867 | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - |
1378
+ | 1.5243 | 270 | 0.7493 | 2.5574 | 1.0634 | 0.2217 | 0.0319 | 0.0435 | 0.5027 | 0.7999 | 0.8005 | 0.6530 | 0.1693 | 0.2443 | 1.8535 | 1.0 | 0.5499 | 0.8716 |
1379
+ | 1.5413 | 273 | 0.7974 | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - |
1380
+ | 1.5582 | 276 | 0.797 | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - |
1381
+ | 1.5752 | 279 | 0.6749 | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - |
1382
+ | 1.5921 | 282 | 0.9325 | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - |
1383
+ | 1.6090 | 285 | 0.8418 | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - |
1384
+ | 1.6260 | 288 | 1.0135 | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - |
1385
+ | 1.6429 | 291 | 0.6961 | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - |
1386
+ | 1.6598 | 294 | 0.9361 | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - |
1387
+ | 1.6768 | 297 | 0.6747 | 2.4871 | 0.9762 | 0.2242 | 0.0291 | 0.0396 | 0.5025 | 0.7668 | 0.7546 | 0.6427 | 0.1596 | 0.1963 | 1.7349 | 1.0 | 0.5461 | 0.8787 |
1388
+ | 1.6937 | 300 | 0.7786 | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - |
1389
+ | 1.7107 | 303 | 0.7171 | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - |
1390
+ | 1.7276 | 306 | 0.6627 | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - |
1391
+ | 1.7445 | 309 | 0.6711 | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - |
1392
+ | 1.7615 | 312 | 0.9076 | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - |
1393
+ | 1.7784 | 315 | 0.7414 | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - |
1394
+ | 1.7953 | 318 | 0.582 | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - |
1395
+ | 1.8123 | 321 | 0.6068 | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - |
1396
+ | 1.8292 | 324 | 0.6219 | 2.5197 | 1.0206 | 0.1630 | 0.0273 | 0.0383 | 0.4859 | 0.7109 | 0.7736 | 0.5533 | 0.1535 | 0.2044 | 1.7016 | 1.0 | 0.5532 | 0.8807 |
1397
+ | 1.8462 | 327 | 0.5862 | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - |
1398
+ | 1.8631 | 330 | 0.678 | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - |
1399
+ | 1.8800 | 333 | 0.6272 | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - |
1400
+ | 1.8970 | 336 | 0.5048 | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - |
1401
+ | 1.9139 | 339 | 0.7653 | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - |
1402
+ | 1.9308 | 342 | 0.6613 | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - |
1403
+ | 1.9478 | 345 | 0.6122 | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - |
1404
+ | 1.9647 | 348 | 0.5939 | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - |
1405
+ | 1.9817 | 351 | 0.6923 | 2.4379 | 0.9582 | 0.1464 | 0.0264 | 0.0382 | 0.4348 | 0.7554 | 0.7220 | 0.5432 | 0.1481 | 0.1640 | 1.7345 | 1.0 | 0.5560 | 0.8837 |
1406
+ | 1.9986 | 354 | 0.5712 | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - |
1407
+ | 2.0155 | 357 | 0.5969 | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - |
1408
+ | 2.0325 | 360 | 0.5881 | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - |
1409
+ | 2.0494 | 363 | 0.6005 | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - |
1410
+ | 2.0663 | 366 | 0.6066 | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - |
1411
+ | 2.0833 | 369 | 0.4921 | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - |
1412
+ | 2.1002 | 372 | 0.5354 | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - |
1413
+ | 2.1171 | 375 | 0.5602 | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - |
1414
+ | 2.1341 | 378 | 0.5686 | 2.3908 | 0.9614 | 0.1454 | 0.0271 | 0.0374 | 0.4246 | 0.7796 | 0.6965 | 0.5298 | 0.1401 | 0.1604 | 1.7678 | 1.0 | 0.5539 | 0.8804 |
1415
+ | 2.1510 | 381 | 0.6496 | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - |
1416
+ | 2.1680 | 384 | 0.4713 | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - |
1417
+ | 2.1849 | 387 | 0.6345 | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - |
1418
+ | 2.2018 | 390 | 0.5994 | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - |
1419
+ | 2.2188 | 393 | 0.6763 | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - |
1420
+ | 2.2357 | 396 | 0.7254 | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - |
1421
+ | 2.2526 | 399 | 0.8032 | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - |
1422
+ | 2.2696 | 402 | 0.4914 | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - |
1423
+ | 2.2865 | 405 | 0.6307 | 2.4388 | 0.9862 | 0.1308 | 0.0262 | 0.0379 | 0.3928 | 0.7434 | 0.6976 | 0.4998 | 0.1192 | 0.1466 | 1.7093 | 1.0 | 0.5533 | 0.8859 |
1424
+ | 2.3035 | 408 | 0.7493 | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - |
1425
+ | 2.3204 | 411 | 0.5139 | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - |
1426
+ | 2.3373 | 414 | 0.6364 | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - |
1427
+ | 2.3543 | 417 | 0.4763 | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - |
1428
+ | 2.3712 | 420 | 0.583 | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - |
1429
+ | 2.3881 | 423 | 0.5912 | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - |
1430
+ | 2.4051 | 426 | 0.5936 | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - |
1431
+ | 2.4220 | 429 | 0.5959 | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - |
1432
+ | 2.4390 | 432 | 0.676 | 2.4265 | 0.9634 | 0.1220 | 0.0260 | 0.0362 | 0.4292 | 0.7433 | 0.6771 | 0.4752 | 0.1282 | 0.1304 | 1.6943 | 1.0 | 0.5532 | 0.8878 |
1433
+ | 2.4559 | 435 | 0.5622 | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - |
1434
+ | 2.4728 | 438 | 0.4633 | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - |
1435
+ | 2.4898 | 441 | 0.5955 | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - |
1436
+ | 2.5067 | 444 | 0.6271 | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - |
1437
+ | 2.5236 | 447 | 0.4988 | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - |
1438
+ | 2.5406 | 450 | 0.519 | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - |
1439
+ | 2.5575 | 453 | 0.5538 | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - |
1440
+ | 2.5745 | 456 | 0.4826 | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - |
1441
+ | 2.5914 | 459 | 0.6322 | 2.4541 | 0.9231 | 0.1224 | 0.0253 | 0.0345 | 0.4048 | 0.7595 | 0.6607 | 0.4713 | 0.1168 | 0.1323 | 1.7024 | 1.0 | 0.5557 | 0.8868 |
1442
+ | 2.6083 | 462 | 0.6342 | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - |
1443
+ | 2.6253 | 465 | 0.7012 | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - |
1444
+ | 2.6422 | 468 | 0.4175 | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - |
1445
+ | 2.6591 | 471 | 0.7575 | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - |
1446
+ | 2.6761 | 474 | 0.4687 | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - |
1447
+ | 2.6930 | 477 | 0.5907 | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - |
1448
+ | 2.7100 | 480 | 0.4796 | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - |
1449
+ | 2.7269 | 483 | 0.4809 | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - |
1450
+ | 2.7438 | 486 | 0.4696 | 2.4899 | 0.9546 | 0.1169 | 0.0247 | 0.0343 | 0.4138 | 0.7444 | 0.6688 | 0.4838 | 0.1166 | 0.1279 | 1.6605 | 1.0 | 0.5527 | 0.8883 |
1451
+ | 2.7608 | 489 | 0.6588 | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - |
1452
+ | 2.7777 | 492 | 0.5675 | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - |
1453
+ | 2.7946 | 495 | 0.4007 | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - |
1454
+ | 2.8116 | 498 | 0.4476 | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - |
1455
+ | 2.8285 | 501 | 0.433 | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - |
1456
+ | 2.8454 | 504 | 0.4154 | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - |
1457
+ | 2.8624 | 507 | 0.5416 | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - |
1458
+ | 2.8793 | 510 | 0.4546 | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - |
1459
+ | 2.8963 | 513 | 0.3326 | 2.4924 | 0.9493 | 0.1071 | 0.0248 | 0.0344 | 0.4033 | 0.7376 | 0.6558 | 0.4478 | 0.1148 | 0.1219 | 1.6918 | 1.0 | 0.5534 | 0.8907 |
1460
+ | 2.9132 | 516 | 0.594 | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - |
1461
+ | 2.9301 | 519 | 0.4727 | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - |
1462
+ | 2.9471 | 522 | 0.4701 | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - |
1463
+ | 2.9640 | 525 | 0.4606 | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - |
1464
+ | 2.9809 | 528 | 0.5025 | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - |
1465
+ | 2.9979 | 531 | 0.4314 | 2.4532 | 0.9270 | 0.1131 | 0.0247 | 0.0344 | 0.3951 | 0.7123 | 0.6345 | 0.4383 | 0.1143 | 0.1159 | 1.7003 | 1.0 | 0.5539 | 0.8904 |
1466
+ | 0.0169 | 3 | 0.6012 | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - |
1467
+ | 0.0337 | 6 | 0.7573 | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - |
1468
+ | 0.0506 | 9 | 0.9212 | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - |
1469
+ | 0.0674 | 12 | 0.6117 | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - |
1470
+ | 0.0843 | 15 | 0.8545 | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - |
1471
+ | 0.1011 | 18 | 0.6515 | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - |
1472
+ | 0.1180 | 21 | 0.7159 | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - |
1473
+ | 0.1348 | 24 | 0.7019 | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - |
1474
+ | 0.1517 | 27 | 0.4411 | 2.4659 | 0.9318 | 0.1117 | 0.0249 | 0.0345 | 0.3955 | 0.7092 | 0.6506 | 0.4205 | 0.1150 | 0.1110 | 1.7311 | 1.0 | 0.5512 | 0.8906 |
1475
+ | 0.1685 | 30 | 0.5125 | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - |
1476
+ | 0.1854 | 33 | 0.6885 | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - |
1477
+ | 0.2022 | 36 | 0.6435 | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - |
1478
+ | 0.2191 | 39 | 0.753 | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - |
1479
+ | 0.2360 | 42 | 0.7427 | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - |
1480
+ | 0.2528 | 45 | 0.5083 | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - |
1481
+ | 0.2697 | 48 | 0.7454 | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - |
1482
+ | 0.2865 | 51 | 0.8356 | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - |
1483
+ | 0.3034 | 54 | 0.8864 | 2.4545 | 0.9158 | 0.1009 | 0.0252 | 0.0347 | 0.3809 | 0.7240 | 0.6208 | 0.4417 | 0.1117 | 0.1055 | 1.7278 | 1.0 | 0.5499 | 0.8877 |
1484
+ | 0.3202 | 57 | 0.6015 | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - |
1485
+ | 0.3371 | 60 | 0.9482 | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - |
1486
+ | 0.3539 | 63 | 0.5404 | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - |
1487
+ | 0.3708 | 66 | 0.805 | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - |
1488
+ | 0.3876 | 69 | 0.7184 | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - |
1489
+ | 0.4045 | 72 | 0.8708 | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - |
1490
+ | 0.4213 | 75 | 0.8327 | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - |
1491
+ | 0.4382 | 78 | 0.5025 | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - |
1492
+ | 0.4551 | 81 | 0.6517 | 2.3539 | 0.9324 | 0.0842 | 0.0244 | 0.0348 | 0.3454 | 0.7161 | 0.6094 | 0.4443 | 0.1182 | 0.1060 | 1.6492 | 1.0 | 0.5557 | 0.8904 |
1493
+ | 0.4719 | 84 | 0.5801 | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - |
1494
+ | 0.4888 | 87 | 0.791 | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - |
1495
+ | 0.5056 | 90 | 0.6042 | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - |
1496
+ | 0.5225 | 93 | 0.7559 | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - |
1497
+ | 0.5393 | 96 | 0.6258 | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - |
1498
+ | 0.5562 | 99 | 0.8853 | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - |
1499
+ | 0.5730 | 102 | 0.5947 | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - |
1500
+ | 0.5899 | 105 | 0.644 | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - |
1501
 
1502
+ </details>
1503
 
1504
  ### Framework Versions
1505
  - Python: 3.10.13
checkpoint-107/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1d68669b06d21146c42d3ab8ac5fb5ecd13179c0bf7760c5eae496de3c2bedb9
3
  size 1130520122
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c4b52b19c1b7ba5dc357d641807fc458a5edfc8d957fa781f3b74046c4c35266
3
  size 1130520122
checkpoint-107/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:509380132a556aa5c5abd5ae7e2966c9aeeff6d3c17f413118c7e375b07b4a0c
3
  size 565251810
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2573f8d44707a08d0f8ac75dc82a2db81d33d5f56b9ff43e4b8bd36b6360356e
3
  size 565251810
checkpoint-107/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dba89f3e04d18dac9ee9bb8984f08a24cbbbd385d8c90bc6b32d99e0123f4094
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9ee4a49a4489e711ad7e57eb8006fdb27dea4dd2f04c074a42f884b3f1874718
3
  size 14244
checkpoint-107/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:eb571e03d7f101ced903b4b06060b388f532cdee9c838a84c6bbeeb165f467db
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:672e608d66e62675fe93e3516bd09994e5b9413ad5901cbe4ab81e1e4e26683a
3
  size 1064
checkpoint-107/trainer_state.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.6040931545518702,
5
  "eval_steps": 27,
6
  "global_step": 107,
7
  "is_hyper_param_search": false,
@@ -9,694 +9,694 @@
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
- "epoch": 0.016937191249117856,
13
- "grad_norm": 34.22002029418945,
14
- "learning_rate": 6.818181818181818e-07,
15
- "loss": 7.2372,
16
  "step": 3
17
  },
18
  {
19
- "epoch": 0.03387438249823571,
20
- "grad_norm": 21.76839828491211,
21
- "learning_rate": 1.3636363636363636e-06,
22
- "loss": 6.855,
23
  "step": 6
24
  },
25
  {
26
- "epoch": 0.05081157374735357,
27
- "grad_norm": 21.260774612426758,
28
- "learning_rate": 2.0454545454545457e-06,
29
- "loss": 7.4707,
30
  "step": 9
31
  },
32
  {
33
- "epoch": 0.06774876499647142,
34
- "grad_norm": 16.885921478271484,
35
- "learning_rate": 2.7272727272727272e-06,
36
- "loss": 7.0187,
37
  "step": 12
38
  },
39
  {
40
- "epoch": 0.08468595624558928,
41
- "grad_norm": 19.509899139404297,
42
- "learning_rate": 3.409090909090909e-06,
43
- "loss": 6.6756,
44
  "step": 15
45
  },
46
  {
47
- "epoch": 0.10162314749470713,
48
- "grad_norm": 7.9427289962768555,
49
- "learning_rate": 4.0909090909090915e-06,
50
- "loss": 6.0155,
51
  "step": 18
52
  },
53
  {
54
- "epoch": 0.11856033874382499,
55
- "grad_norm": 7.325345039367676,
56
- "learning_rate": 4.772727272727273e-06,
57
- "loss": 6.1644,
58
  "step": 21
59
  },
60
  {
61
- "epoch": 0.13549752999294284,
62
- "grad_norm": 7.544689655303955,
63
- "learning_rate": 5.4545454545454545e-06,
64
- "loss": 6.2158,
65
  "step": 24
66
  },
67
  {
68
- "epoch": 0.1524347212420607,
69
- "grad_norm": 5.141758918762207,
70
- "learning_rate": 6.136363636363637e-06,
71
- "loss": 6.1369,
72
  "step": 27
73
  },
74
  {
75
- "epoch": 0.1524347212420607,
76
  "eval_NLI-v2_cosine_accuracy": 1.0,
77
- "eval_NLI-v2_dot_accuracy": 0.109375,
78
  "eval_NLI-v2_euclidean_accuracy": 1.0,
79
  "eval_NLI-v2_manhattan_accuracy": 1.0,
80
  "eval_NLI-v2_max_accuracy": 1.0,
81
- "eval_VitaminC_cosine_accuracy": 0.5546875,
82
- "eval_VitaminC_cosine_accuracy_threshold": 0.9544724822044373,
83
- "eval_VitaminC_cosine_ap": 0.5356492030729136,
84
- "eval_VitaminC_cosine_f1": 0.6542553191489362,
85
- "eval_VitaminC_cosine_f1_threshold": 0.7148199081420898,
86
- "eval_VitaminC_cosine_precision": 0.48616600790513836,
87
  "eval_VitaminC_cosine_recall": 1.0,
88
- "eval_VitaminC_dot_accuracy": 0.55078125,
89
- "eval_VitaminC_dot_accuracy_threshold": 414.4264831542969,
90
- "eval_VitaminC_dot_ap": 0.5108219546857565,
91
- "eval_VitaminC_dot_f1": 0.6507936507936508,
92
- "eval_VitaminC_dot_f1_threshold": 271.6522521972656,
93
- "eval_VitaminC_dot_precision": 0.4823529411764706,
94
  "eval_VitaminC_dot_recall": 1.0,
95
- "eval_VitaminC_euclidean_accuracy": 0.55078125,
96
- "eval_VitaminC_euclidean_accuracy_threshold": 6.519885063171387,
97
- "eval_VitaminC_euclidean_ap": 0.5226419655984281,
98
- "eval_VitaminC_euclidean_f1": 0.6505376344086021,
99
- "eval_VitaminC_euclidean_f1_threshold": 15.194067001342773,
100
- "eval_VitaminC_euclidean_precision": 0.4859437751004016,
101
- "eval_VitaminC_euclidean_recall": 0.983739837398374,
102
- "eval_VitaminC_manhattan_accuracy": 0.546875,
103
- "eval_VitaminC_manhattan_accuracy_threshold": 149.20114135742188,
104
- "eval_VitaminC_manhattan_ap": 0.5237451656134715,
105
- "eval_VitaminC_manhattan_f1": 0.6542553191489362,
106
- "eval_VitaminC_manhattan_f1_threshold": 259.007080078125,
107
- "eval_VitaminC_manhattan_precision": 0.48616600790513836,
108
  "eval_VitaminC_manhattan_recall": 1.0,
109
- "eval_VitaminC_max_accuracy": 0.5546875,
110
- "eval_VitaminC_max_accuracy_threshold": 414.4264831542969,
111
- "eval_VitaminC_max_ap": 0.5356492030729136,
112
- "eval_VitaminC_max_f1": 0.6542553191489362,
113
- "eval_VitaminC_max_f1_threshold": 271.6522521972656,
114
- "eval_VitaminC_max_precision": 0.48616600790513836,
115
  "eval_VitaminC_max_recall": 1.0,
116
- "eval_sequential_score": 0.5356492030729136,
117
- "eval_sts-test_pearson_cosine": 0.056062031998983373,
118
- "eval_sts-test_pearson_dot": 0.2979259445723872,
119
- "eval_sts-test_pearson_euclidean": 0.0498319208592713,
120
- "eval_sts-test_pearson_manhattan": 0.07381429239121526,
121
- "eval_sts-test_pearson_max": 0.2979259445723872,
122
- "eval_sts-test_spearman_cosine": 0.1066788491614481,
123
- "eval_sts-test_spearman_dot": 0.315952670306405,
124
- "eval_sts-test_spearman_euclidean": 0.07303394554435191,
125
- "eval_sts-test_spearman_manhattan": 0.09039525717692232,
126
- "eval_sts-test_spearman_max": 0.315952670306405,
127
- "eval_vitaminc-pairs_loss": 2.698580741882324,
128
- "eval_vitaminc-pairs_runtime": 1.4747,
129
- "eval_vitaminc-pairs_samples_per_second": 73.236,
130
- "eval_vitaminc-pairs_steps_per_second": 1.356,
131
  "step": 27
132
  },
133
  {
134
- "epoch": 0.1524347212420607,
135
- "eval_negation-triplets_loss": 5.142906665802002,
136
- "eval_negation-triplets_runtime": 0.2993,
137
- "eval_negation-triplets_samples_per_second": 213.865,
138
- "eval_negation-triplets_steps_per_second": 3.342,
139
  "step": 27
140
  },
141
  {
142
- "epoch": 0.1524347212420607,
143
- "eval_scitail-pairs-pos_loss": 1.9216958284378052,
144
- "eval_scitail-pairs-pos_runtime": 0.3834,
145
- "eval_scitail-pairs-pos_samples_per_second": 140.842,
146
- "eval_scitail-pairs-pos_steps_per_second": 2.608,
147
  "step": 27
148
  },
149
  {
150
- "epoch": 0.1524347212420607,
151
- "eval_xsum-pairs_loss": 6.073049545288086,
152
- "eval_xsum-pairs_runtime": 3.1587,
153
- "eval_xsum-pairs_samples_per_second": 40.523,
154
- "eval_xsum-pairs_steps_per_second": 0.633,
155
  "step": 27
156
  },
157
  {
158
- "epoch": 0.1524347212420607,
159
- "eval_sciq_pairs_loss": 0.3449864387512207,
160
- "eval_sciq_pairs_runtime": 3.3747,
161
- "eval_sciq_pairs_samples_per_second": 37.93,
162
- "eval_sciq_pairs_steps_per_second": 0.593,
163
  "step": 27
164
  },
165
  {
166
- "epoch": 0.1524347212420607,
167
- "eval_qasc_pairs_loss": 3.2267842292785645,
168
- "eval_qasc_pairs_runtime": 0.6576,
169
- "eval_qasc_pairs_samples_per_second": 194.646,
170
- "eval_qasc_pairs_steps_per_second": 3.041,
171
  "step": 27
172
  },
173
  {
174
- "epoch": 0.1524347212420607,
175
- "eval_openbookqa_pairs_loss": 4.405983924865723,
176
- "eval_openbookqa_pairs_runtime": 0.6107,
177
- "eval_openbookqa_pairs_samples_per_second": 209.594,
178
- "eval_openbookqa_pairs_steps_per_second": 3.275,
179
  "step": 27
180
  },
181
  {
182
- "epoch": 0.1524347212420607,
183
- "eval_msmarco_pairs_loss": 6.937691688537598,
184
- "eval_msmarco_pairs_runtime": 1.3091,
185
- "eval_msmarco_pairs_samples_per_second": 97.779,
186
- "eval_msmarco_pairs_steps_per_second": 1.528,
187
  "step": 27
188
  },
189
  {
190
- "epoch": 0.1524347212420607,
191
- "eval_nq_pairs_loss": 6.794108867645264,
192
- "eval_nq_pairs_runtime": 2.3968,
193
- "eval_nq_pairs_samples_per_second": 53.404,
194
- "eval_nq_pairs_steps_per_second": 0.834,
195
  "step": 27
196
  },
197
  {
198
- "epoch": 0.1524347212420607,
199
- "eval_trivia_pairs_loss": 6.3355631828308105,
200
- "eval_trivia_pairs_runtime": 4.4974,
201
- "eval_trivia_pairs_samples_per_second": 28.461,
202
- "eval_trivia_pairs_steps_per_second": 0.445,
203
  "step": 27
204
  },
205
  {
206
- "epoch": 0.1524347212420607,
207
- "eval_gooaq_pairs_loss": 6.405998706817627,
208
- "eval_gooaq_pairs_runtime": 0.8745,
209
- "eval_gooaq_pairs_samples_per_second": 146.37,
210
- "eval_gooaq_pairs_steps_per_second": 2.287,
211
  "step": 27
212
  },
213
  {
214
- "epoch": 0.1524347212420607,
215
- "eval_paws-pos_loss": 2.2308223247528076,
216
- "eval_paws-pos_runtime": 0.6998,
217
- "eval_paws-pos_samples_per_second": 182.908,
218
- "eval_paws-pos_steps_per_second": 2.858,
219
  "step": 27
220
  },
221
  {
222
- "epoch": 0.16937191249117856,
223
- "grad_norm": 5.885251522064209,
224
- "learning_rate": 6.818181818181818e-06,
225
- "loss": 5.7653,
226
  "step": 30
227
  },
228
  {
229
- "epoch": 0.1863091037402964,
230
- "grad_norm": 7.357480049133301,
231
- "learning_rate": 7.500000000000001e-06,
232
- "loss": 6.1259,
233
  "step": 33
234
  },
235
  {
236
- "epoch": 0.20324629498941427,
237
- "grad_norm": 7.321795463562012,
238
- "learning_rate": 8.181818181818183e-06,
239
- "loss": 5.7539,
240
  "step": 36
241
  },
242
  {
243
- "epoch": 0.22018348623853212,
244
- "grad_norm": 4.239792346954346,
245
- "learning_rate": 8.863636363636365e-06,
246
- "loss": 6.0131,
247
  "step": 39
248
  },
249
  {
250
- "epoch": 0.23712067748764998,
251
- "grad_norm": 3.9554407596588135,
252
- "learning_rate": 9.545454545454547e-06,
253
- "loss": 6.0074,
254
  "step": 42
255
  },
256
  {
257
- "epoch": 0.25405786873676783,
258
- "grad_norm": 4.406026840209961,
259
- "learning_rate": 1.0227272727272729e-05,
260
- "loss": 5.7125,
261
  "step": 45
262
  },
263
  {
264
- "epoch": 0.2709950599858857,
265
- "grad_norm": 7.235893249511719,
266
- "learning_rate": 1.0909090909090909e-05,
267
- "loss": 5.5634,
268
  "step": 48
269
  },
270
  {
271
- "epoch": 0.28793225123500354,
272
- "grad_norm": 5.330288410186768,
273
- "learning_rate": 1.1590909090909093e-05,
274
- "loss": 5.2924,
275
  "step": 51
276
  },
277
  {
278
- "epoch": 0.3048694424841214,
279
- "grad_norm": 7.216403961181641,
280
- "learning_rate": 1.2272727272727274e-05,
281
- "loss": 5.2286,
282
  "step": 54
283
  },
284
  {
285
- "epoch": 0.3048694424841214,
286
  "eval_NLI-v2_cosine_accuracy": 1.0,
287
- "eval_NLI-v2_dot_accuracy": 0.046875,
288
  "eval_NLI-v2_euclidean_accuracy": 1.0,
289
  "eval_NLI-v2_manhattan_accuracy": 1.0,
290
  "eval_NLI-v2_max_accuracy": 1.0,
291
- "eval_VitaminC_cosine_accuracy": 0.54296875,
292
- "eval_VitaminC_cosine_accuracy_threshold": 0.9328227043151855,
293
- "eval_VitaminC_cosine_ap": 0.5212059026196154,
294
- "eval_VitaminC_cosine_f1": 0.6576819407008085,
295
- "eval_VitaminC_cosine_f1_threshold": 0.7373804450035095,
296
- "eval_VitaminC_cosine_precision": 0.49193548387096775,
297
- "eval_VitaminC_cosine_recall": 0.991869918699187,
298
- "eval_VitaminC_dot_accuracy": 0.55078125,
299
- "eval_VitaminC_dot_accuracy_threshold": 418.2774658203125,
300
- "eval_VitaminC_dot_ap": 0.5160594099493883,
301
- "eval_VitaminC_dot_f1": 0.6521739130434782,
302
- "eval_VitaminC_dot_f1_threshold": 291.5081481933594,
303
- "eval_VitaminC_dot_precision": 0.4897959183673469,
304
- "eval_VitaminC_dot_recall": 0.975609756097561,
305
- "eval_VitaminC_euclidean_accuracy": 0.5390625,
306
- "eval_VitaminC_euclidean_accuracy_threshold": 8.120429039001465,
307
- "eval_VitaminC_euclidean_ap": 0.5224837623095228,
308
- "eval_VitaminC_euclidean_f1": 0.6576819407008085,
309
- "eval_VitaminC_euclidean_f1_threshold": 14.879999160766602,
310
- "eval_VitaminC_euclidean_precision": 0.49193548387096775,
311
- "eval_VitaminC_euclidean_recall": 0.991869918699187,
312
- "eval_VitaminC_manhattan_accuracy": 0.53515625,
313
- "eval_VitaminC_manhattan_accuracy_threshold": 137.40658569335938,
314
- "eval_VitaminC_manhattan_ap": 0.5186382518671783,
315
- "eval_VitaminC_manhattan_f1": 0.6576086956521738,
316
- "eval_VitaminC_manhattan_f1_threshold": 263.32452392578125,
317
- "eval_VitaminC_manhattan_precision": 0.49387755102040815,
318
- "eval_VitaminC_manhattan_recall": 0.983739837398374,
319
- "eval_VitaminC_max_accuracy": 0.55078125,
320
- "eval_VitaminC_max_accuracy_threshold": 418.2774658203125,
321
- "eval_VitaminC_max_ap": 0.5224837623095228,
322
- "eval_VitaminC_max_f1": 0.6576819407008085,
323
- "eval_VitaminC_max_f1_threshold": 291.5081481933594,
324
- "eval_VitaminC_max_precision": 0.49387755102040815,
325
- "eval_VitaminC_max_recall": 0.991869918699187,
326
- "eval_sequential_score": 0.5224837623095228,
327
- "eval_sts-test_pearson_cosine": 0.14377091128453176,
328
- "eval_sts-test_pearson_dot": 0.24728387094758872,
329
- "eval_sts-test_pearson_euclidean": 0.14604155960515372,
330
- "eval_sts-test_pearson_manhattan": 0.1446467532231986,
331
- "eval_sts-test_pearson_max": 0.24728387094758872,
332
- "eval_sts-test_spearman_cosine": 0.1968510434344728,
333
- "eval_sts-test_spearman_dot": 0.29467218283745694,
334
- "eval_sts-test_spearman_euclidean": 0.17218164683969664,
335
- "eval_sts-test_spearman_manhattan": 0.17741843340856742,
336
- "eval_sts-test_spearman_max": 0.29467218283745694,
337
- "eval_vitaminc-pairs_loss": 2.664700746536255,
338
- "eval_vitaminc-pairs_runtime": 1.4487,
339
- "eval_vitaminc-pairs_samples_per_second": 74.551,
340
- "eval_vitaminc-pairs_steps_per_second": 1.381,
341
  "step": 54
342
  },
343
  {
344
- "epoch": 0.3048694424841214,
345
- "eval_negation-triplets_loss": 4.6218037605285645,
346
- "eval_negation-triplets_runtime": 0.2971,
347
- "eval_negation-triplets_samples_per_second": 215.438,
348
- "eval_negation-triplets_steps_per_second": 3.366,
349
  "step": 54
350
  },
351
  {
352
- "epoch": 0.3048694424841214,
353
- "eval_scitail-pairs-pos_loss": 1.2413936853408813,
354
- "eval_scitail-pairs-pos_runtime": 0.372,
355
- "eval_scitail-pairs-pos_samples_per_second": 145.175,
356
- "eval_scitail-pairs-pos_steps_per_second": 2.688,
357
  "step": 54
358
  },
359
  {
360
- "epoch": 0.3048694424841214,
361
- "eval_xsum-pairs_loss": 5.249766826629639,
362
- "eval_xsum-pairs_runtime": 3.1506,
363
- "eval_xsum-pairs_samples_per_second": 40.627,
364
- "eval_xsum-pairs_steps_per_second": 0.635,
365
  "step": 54
366
  },
367
  {
368
- "epoch": 0.3048694424841214,
369
- "eval_sciq_pairs_loss": 0.2961578667163849,
370
- "eval_sciq_pairs_runtime": 3.2909,
371
- "eval_sciq_pairs_samples_per_second": 38.895,
372
- "eval_sciq_pairs_steps_per_second": 0.608,
373
  "step": 54
374
  },
375
  {
376
- "epoch": 0.3048694424841214,
377
- "eval_qasc_pairs_loss": 2.530872344970703,
378
- "eval_qasc_pairs_runtime": 0.6255,
379
- "eval_qasc_pairs_samples_per_second": 204.63,
380
- "eval_qasc_pairs_steps_per_second": 3.197,
381
  "step": 54
382
  },
383
  {
384
- "epoch": 0.3048694424841214,
385
- "eval_openbookqa_pairs_loss": 3.8855104446411133,
386
- "eval_openbookqa_pairs_runtime": 0.5742,
387
- "eval_openbookqa_pairs_samples_per_second": 222.914,
388
- "eval_openbookqa_pairs_steps_per_second": 3.483,
389
  "step": 54
390
  },
391
  {
392
- "epoch": 0.3048694424841214,
393
- "eval_msmarco_pairs_loss": 5.246406555175781,
394
- "eval_msmarco_pairs_runtime": 1.2872,
395
- "eval_msmarco_pairs_samples_per_second": 99.442,
396
- "eval_msmarco_pairs_steps_per_second": 1.554,
397
  "step": 54
398
  },
399
  {
400
- "epoch": 0.3048694424841214,
401
- "eval_nq_pairs_loss": 5.332630157470703,
402
- "eval_nq_pairs_runtime": 2.3739,
403
- "eval_nq_pairs_samples_per_second": 53.92,
404
- "eval_nq_pairs_steps_per_second": 0.843,
405
  "step": 54
406
  },
407
  {
408
- "epoch": 0.3048694424841214,
409
- "eval_trivia_pairs_loss": 5.647429943084717,
410
- "eval_trivia_pairs_runtime": 4.4729,
411
- "eval_trivia_pairs_samples_per_second": 28.617,
412
- "eval_trivia_pairs_steps_per_second": 0.447,
413
  "step": 54
414
  },
415
  {
416
- "epoch": 0.3048694424841214,
417
- "eval_gooaq_pairs_loss": 5.225871562957764,
418
- "eval_gooaq_pairs_runtime": 0.8715,
419
- "eval_gooaq_pairs_samples_per_second": 146.868,
420
- "eval_gooaq_pairs_steps_per_second": 2.295,
421
  "step": 54
422
  },
423
  {
424
- "epoch": 0.3048694424841214,
425
- "eval_paws-pos_loss": 0.8335962891578674,
426
- "eval_paws-pos_runtime": 0.6844,
427
- "eval_paws-pos_samples_per_second": 187.036,
428
- "eval_paws-pos_steps_per_second": 2.922,
429
  "step": 54
430
  },
431
  {
432
- "epoch": 0.32180663373323926,
433
- "grad_norm": 6.847682952880859,
434
- "learning_rate": 1.2954545454545455e-05,
435
- "loss": 4.4811,
436
  "step": 57
437
  },
438
  {
439
- "epoch": 0.3387438249823571,
440
- "grad_norm": 8.383002281188965,
441
- "learning_rate": 1.3636363636363637e-05,
442
- "loss": 4.4239,
443
  "step": 60
444
  },
445
  {
446
- "epoch": 0.35568101623147497,
447
- "grad_norm": 7.014843463897705,
448
- "learning_rate": 1.431818181818182e-05,
449
- "loss": 4.0273,
450
  "step": 63
451
  },
452
  {
453
- "epoch": 0.3726182074805928,
454
- "grad_norm": 5.9739885330200195,
455
- "learning_rate": 1.5000000000000002e-05,
456
- "loss": 3.4508,
457
  "step": 66
458
  },
459
  {
460
- "epoch": 0.3895553987297107,
461
- "grad_norm": 11.202752113342285,
462
- "learning_rate": 1.5681818181818182e-05,
463
- "loss": 3.9702,
464
  "step": 69
465
  },
466
  {
467
- "epoch": 0.40649258997882853,
468
- "grad_norm": 7.064818859100342,
469
- "learning_rate": 1.6363636363636366e-05,
470
- "loss": 3.5295,
471
  "step": 72
472
  },
473
  {
474
- "epoch": 0.4234297812279464,
475
- "grad_norm": 5.912719249725342,
476
- "learning_rate": 1.7045454545454546e-05,
477
- "loss": 3.6395,
478
  "step": 75
479
  },
480
  {
481
- "epoch": 0.44036697247706424,
482
- "grad_norm": 5.033207893371582,
483
- "learning_rate": 1.772727272727273e-05,
484
- "loss": 3.2398,
485
  "step": 78
486
  },
487
  {
488
- "epoch": 0.4573041637261821,
489
- "grad_norm": 5.218384265899658,
490
- "learning_rate": 1.840909090909091e-05,
491
- "loss": 3.116,
492
  "step": 81
493
  },
494
  {
495
- "epoch": 0.4573041637261821,
496
  "eval_NLI-v2_cosine_accuracy": 1.0,
497
  "eval_NLI-v2_dot_accuracy": 0.0,
498
  "eval_NLI-v2_euclidean_accuracy": 1.0,
499
  "eval_NLI-v2_manhattan_accuracy": 1.0,
500
  "eval_NLI-v2_max_accuracy": 1.0,
501
- "eval_VitaminC_cosine_accuracy": 0.5546875,
502
- "eval_VitaminC_cosine_accuracy_threshold": 0.9041332006454468,
503
- "eval_VitaminC_cosine_ap": 0.5292859731465609,
504
- "eval_VitaminC_cosine_f1": 0.6542553191489362,
505
- "eval_VitaminC_cosine_f1_threshold": 0.452939510345459,
506
- "eval_VitaminC_cosine_precision": 0.48616600790513836,
507
  "eval_VitaminC_cosine_recall": 1.0,
508
- "eval_VitaminC_dot_accuracy": 0.5546875,
509
- "eval_VitaminC_dot_accuracy_threshold": 414.42559814453125,
510
- "eval_VitaminC_dot_ap": 0.5222732504955002,
511
- "eval_VitaminC_dot_f1": 0.6542553191489362,
512
- "eval_VitaminC_dot_f1_threshold": 212.6934814453125,
513
- "eval_VitaminC_dot_precision": 0.48616600790513836,
514
  "eval_VitaminC_dot_recall": 1.0,
515
- "eval_VitaminC_euclidean_accuracy": 0.5546875,
516
- "eval_VitaminC_euclidean_accuracy_threshold": 9.18377685546875,
517
- "eval_VitaminC_euclidean_ap": 0.5291787221346742,
518
- "eval_VitaminC_euclidean_f1": 0.6542553191489362,
519
- "eval_VitaminC_euclidean_f1_threshold": 22.683509826660156,
520
- "eval_VitaminC_euclidean_precision": 0.48616600790513836,
521
  "eval_VitaminC_euclidean_recall": 1.0,
522
- "eval_VitaminC_manhattan_accuracy": 0.55859375,
523
- "eval_VitaminC_manhattan_accuracy_threshold": 173.8212127685547,
524
- "eval_VitaminC_manhattan_ap": 0.5305698453165033,
525
- "eval_VitaminC_manhattan_f1": 0.6542553191489362,
526
- "eval_VitaminC_manhattan_f1_threshold": 415.5366516113281,
527
- "eval_VitaminC_manhattan_precision": 0.48616600790513836,
528
- "eval_VitaminC_manhattan_recall": 1.0,
529
- "eval_VitaminC_max_accuracy": 0.55859375,
530
- "eval_VitaminC_max_accuracy_threshold": 414.42559814453125,
531
- "eval_VitaminC_max_ap": 0.5305698453165033,
532
- "eval_VitaminC_max_f1": 0.6542553191489362,
533
- "eval_VitaminC_max_f1_threshold": 415.5366516113281,
534
- "eval_VitaminC_max_precision": 0.48616600790513836,
535
  "eval_VitaminC_max_recall": 1.0,
536
- "eval_sequential_score": 0.5305698453165033,
537
- "eval_sts-test_pearson_cosine": 0.45494716382349193,
538
- "eval_sts-test_pearson_dot": 0.44837123659858896,
539
- "eval_sts-test_pearson_euclidean": 0.4480861256491879,
540
- "eval_sts-test_pearson_manhattan": 0.4417008219313264,
541
- "eval_sts-test_pearson_max": 0.45494716382349193,
542
- "eval_sts-test_spearman_cosine": 0.48921418507251446,
543
- "eval_sts-test_spearman_dot": 0.46707725062744593,
544
- "eval_sts-test_spearman_euclidean": 0.4610824798409968,
545
- "eval_sts-test_spearman_manhattan": 0.46068648052845956,
546
- "eval_sts-test_spearman_max": 0.48921418507251446,
547
- "eval_vitaminc-pairs_loss": 2.5043575763702393,
548
- "eval_vitaminc-pairs_runtime": 1.4778,
549
- "eval_vitaminc-pairs_samples_per_second": 73.079,
550
- "eval_vitaminc-pairs_steps_per_second": 1.353,
551
  "step": 81
552
  },
553
  {
554
- "epoch": 0.4573041637261821,
555
- "eval_negation-triplets_loss": 3.4229447841644287,
556
- "eval_negation-triplets_runtime": 0.2991,
557
- "eval_negation-triplets_samples_per_second": 213.954,
558
- "eval_negation-triplets_steps_per_second": 3.343,
559
  "step": 81
560
  },
561
  {
562
- "epoch": 0.4573041637261821,
563
- "eval_scitail-pairs-pos_loss": 0.2784869372844696,
564
- "eval_scitail-pairs-pos_runtime": 0.3633,
565
- "eval_scitail-pairs-pos_samples_per_second": 148.649,
566
- "eval_scitail-pairs-pos_steps_per_second": 2.753,
567
  "step": 81
568
  },
569
  {
570
- "epoch": 0.4573041637261821,
571
- "eval_xsum-pairs_loss": 2.428964614868164,
572
- "eval_xsum-pairs_runtime": 3.1548,
573
- "eval_xsum-pairs_samples_per_second": 40.573,
574
- "eval_xsum-pairs_steps_per_second": 0.634,
575
  "step": 81
576
  },
577
  {
578
- "epoch": 0.4573041637261821,
579
- "eval_sciq_pairs_loss": 0.15256048738956451,
580
- "eval_sciq_pairs_runtime": 3.2432,
581
- "eval_sciq_pairs_samples_per_second": 39.467,
582
- "eval_sciq_pairs_steps_per_second": 0.617,
583
  "step": 81
584
  },
585
  {
586
- "epoch": 0.4573041637261821,
587
- "eval_qasc_pairs_loss": 1.2902077436447144,
588
- "eval_qasc_pairs_runtime": 0.6211,
589
- "eval_qasc_pairs_samples_per_second": 206.085,
590
- "eval_qasc_pairs_steps_per_second": 3.22,
591
  "step": 81
592
  },
593
  {
594
- "epoch": 0.4573041637261821,
595
- "eval_openbookqa_pairs_loss": 2.4784862995147705,
596
- "eval_openbookqa_pairs_runtime": 0.5758,
597
- "eval_openbookqa_pairs_samples_per_second": 222.308,
598
- "eval_openbookqa_pairs_steps_per_second": 3.474,
599
  "step": 81
600
  },
601
  {
602
- "epoch": 0.4573041637261821,
603
- "eval_msmarco_pairs_loss": 2.967724084854126,
604
- "eval_msmarco_pairs_runtime": 1.2944,
605
- "eval_msmarco_pairs_samples_per_second": 98.885,
606
- "eval_msmarco_pairs_steps_per_second": 1.545,
607
  "step": 81
608
  },
609
  {
610
- "epoch": 0.4573041637261821,
611
- "eval_nq_pairs_loss": 3.358661413192749,
612
- "eval_nq_pairs_runtime": 2.3827,
613
- "eval_nq_pairs_samples_per_second": 53.722,
614
- "eval_nq_pairs_steps_per_second": 0.839,
615
  "step": 81
616
  },
617
  {
618
- "epoch": 0.4573041637261821,
619
- "eval_trivia_pairs_loss": 3.1391680240631104,
620
- "eval_trivia_pairs_runtime": 4.4155,
621
- "eval_trivia_pairs_samples_per_second": 28.989,
622
- "eval_trivia_pairs_steps_per_second": 0.453,
623
  "step": 81
624
  },
625
  {
626
- "epoch": 0.4573041637261821,
627
- "eval_gooaq_pairs_loss": 2.8774912357330322,
628
- "eval_gooaq_pairs_runtime": 0.8746,
629
- "eval_gooaq_pairs_samples_per_second": 146.346,
630
- "eval_gooaq_pairs_steps_per_second": 2.287,
631
  "step": 81
632
  },
633
  {
634
- "epoch": 0.4573041637261821,
635
- "eval_paws-pos_loss": 0.19754411280155182,
636
- "eval_paws-pos_runtime": 0.684,
637
- "eval_paws-pos_samples_per_second": 187.141,
638
- "eval_paws-pos_steps_per_second": 2.924,
639
  "step": 81
640
  },
641
  {
642
- "epoch": 0.47424135497529996,
643
- "grad_norm": 5.149569988250732,
644
- "learning_rate": 1.9090909090909094e-05,
645
- "loss": 2.6049,
646
  "step": 84
647
  },
648
  {
649
- "epoch": 0.4911785462244178,
650
- "grad_norm": 5.012928009033203,
651
- "learning_rate": 1.9772727272727274e-05,
652
- "loss": 2.7738,
653
  "step": 87
654
  },
655
  {
656
- "epoch": 0.5081157374735357,
657
- "grad_norm": 4.880725383758545,
658
- "learning_rate": 2.0454545454545457e-05,
659
- "loss": 2.5416,
660
  "step": 90
661
  },
662
  {
663
- "epoch": 0.5250529287226535,
664
- "grad_norm": 5.618528366088867,
665
- "learning_rate": 2.113636363636364e-05,
666
- "loss": 2.3913,
667
  "step": 93
668
  },
669
  {
670
- "epoch": 0.5419901199717714,
671
- "grad_norm": 5.020515441894531,
672
- "learning_rate": 2.1818181818181818e-05,
673
- "loss": 2.3144,
674
  "step": 96
675
  },
676
  {
677
- "epoch": 0.5589273112208892,
678
- "grad_norm": 4.818451404571533,
679
- "learning_rate": 2.25e-05,
680
- "loss": 2.1857,
681
  "step": 99
682
  },
683
  {
684
- "epoch": 0.5758645024700071,
685
- "grad_norm": 5.094771385192871,
686
- "learning_rate": 2.3181818181818185e-05,
687
- "loss": 1.8881,
688
  "step": 102
689
  },
690
  {
691
- "epoch": 0.592801693719125,
692
- "grad_norm": 3.795962333679199,
693
- "learning_rate": 2.3863636363636365e-05,
694
- "loss": 2.2699,
695
  "step": 105
696
  }
697
  ],
698
  "logging_steps": 3,
699
- "max_steps": 531,
700
  "num_input_tokens_seen": 0,
701
  "num_train_epochs": 3,
702
  "save_steps": 107,
@@ -713,7 +713,7 @@
713
  }
714
  },
715
  "total_flos": 0.0,
716
- "train_batch_size": 160,
717
  "trial_name": null,
718
  "trial_params": null
719
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.601123595505618,
5
  "eval_steps": 27,
6
  "global_step": 107,
7
  "is_hyper_param_search": false,
 
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
+ "epoch": 0.016853932584269662,
13
+ "grad_norm": 2.9885776042938232,
14
+ "learning_rate": 1.4814814814814815e-06,
15
+ "loss": 0.6012,
16
  "step": 3
17
  },
18
  {
19
+ "epoch": 0.033707865168539325,
20
+ "grad_norm": 3.184929132461548,
21
+ "learning_rate": 2.962962962962963e-06,
22
+ "loss": 0.7573,
23
  "step": 6
24
  },
25
  {
26
+ "epoch": 0.05056179775280899,
27
+ "grad_norm": 3.256159782409668,
28
+ "learning_rate": 4.444444444444444e-06,
29
+ "loss": 0.9212,
30
  "step": 9
31
  },
32
  {
33
+ "epoch": 0.06741573033707865,
34
+ "grad_norm": 2.833339214324951,
35
+ "learning_rate": 5.925925925925926e-06,
36
+ "loss": 0.6117,
37
  "step": 12
38
  },
39
  {
40
+ "epoch": 0.08426966292134831,
41
+ "grad_norm": 3.08292818069458,
42
+ "learning_rate": 7.4074074074074075e-06,
43
+ "loss": 0.8545,
44
  "step": 15
45
  },
46
  {
47
+ "epoch": 0.10112359550561797,
48
+ "grad_norm": 2.317431688308716,
49
+ "learning_rate": 8.888888888888888e-06,
50
+ "loss": 0.6515,
51
  "step": 18
52
  },
53
  {
54
+ "epoch": 0.11797752808988764,
55
+ "grad_norm": 2.9611644744873047,
56
+ "learning_rate": 1.037037037037037e-05,
57
+ "loss": 0.7159,
58
  "step": 21
59
  },
60
  {
61
+ "epoch": 0.1348314606741573,
62
+ "grad_norm": 2.698537826538086,
63
+ "learning_rate": 1.1851851851851852e-05,
64
+ "loss": 0.7019,
65
  "step": 24
66
  },
67
  {
68
+ "epoch": 0.15168539325842698,
69
+ "grad_norm": 2.222154378890991,
70
+ "learning_rate": 1.3333333333333333e-05,
71
+ "loss": 0.4411,
72
  "step": 27
73
  },
74
  {
75
+ "epoch": 0.15168539325842698,
76
  "eval_NLI-v2_cosine_accuracy": 1.0,
77
+ "eval_NLI-v2_dot_accuracy": 0.0,
78
  "eval_NLI-v2_euclidean_accuracy": 1.0,
79
  "eval_NLI-v2_manhattan_accuracy": 1.0,
80
  "eval_NLI-v2_max_accuracy": 1.0,
81
+ "eval_VitaminC_cosine_accuracy": 0.578125,
82
+ "eval_VitaminC_cosine_accuracy_threshold": 0.7817381620407104,
83
+ "eval_VitaminC_cosine_ap": 0.5507972943944112,
84
+ "eval_VitaminC_cosine_f1": 0.6595174262734584,
85
+ "eval_VitaminC_cosine_f1_threshold": 0.28573715686798096,
86
+ "eval_VitaminC_cosine_precision": 0.492,
87
  "eval_VitaminC_cosine_recall": 1.0,
88
+ "eval_VitaminC_dot_accuracy": 0.5703125,
89
+ "eval_VitaminC_dot_accuracy_threshold": 316.7283020019531,
90
+ "eval_VitaminC_dot_ap": 0.5511866185449577,
91
+ "eval_VitaminC_dot_f1": 0.6577540106951871,
92
+ "eval_VitaminC_dot_f1_threshold": 106.75863647460938,
93
+ "eval_VitaminC_dot_precision": 0.4900398406374502,
94
  "eval_VitaminC_dot_recall": 1.0,
95
+ "eval_VitaminC_euclidean_accuracy": 0.578125,
96
+ "eval_VitaminC_euclidean_accuracy_threshold": 13.298419952392578,
97
+ "eval_VitaminC_euclidean_ap": 0.5476323986807207,
98
+ "eval_VitaminC_euclidean_f1": 0.6577540106951871,
99
+ "eval_VitaminC_euclidean_f1_threshold": 23.83933448791504,
100
+ "eval_VitaminC_euclidean_precision": 0.4900398406374502,
101
+ "eval_VitaminC_euclidean_recall": 1.0,
102
+ "eval_VitaminC_manhattan_accuracy": 0.578125,
103
+ "eval_VitaminC_manhattan_accuracy_threshold": 279.69085693359375,
104
+ "eval_VitaminC_manhattan_ap": 0.5412538781107805,
105
+ "eval_VitaminC_manhattan_f1": 0.6577540106951871,
106
+ "eval_VitaminC_manhattan_f1_threshold": 499.8836364746094,
107
+ "eval_VitaminC_manhattan_precision": 0.4900398406374502,
108
  "eval_VitaminC_manhattan_recall": 1.0,
109
+ "eval_VitaminC_max_accuracy": 0.578125,
110
+ "eval_VitaminC_max_accuracy_threshold": 316.7283020019531,
111
+ "eval_VitaminC_max_ap": 0.5511866185449577,
112
+ "eval_VitaminC_max_f1": 0.6595174262734584,
113
+ "eval_VitaminC_max_f1_threshold": 499.8836364746094,
114
+ "eval_VitaminC_max_precision": 0.492,
115
  "eval_VitaminC_max_recall": 1.0,
116
+ "eval_sequential_score": 0.5511866185449577,
117
+ "eval_sts-test_pearson_cosine": 0.8488243436029344,
118
+ "eval_sts-test_pearson_dot": 0.8480167969551653,
119
+ "eval_sts-test_pearson_euclidean": 0.8800283985117625,
120
+ "eval_sts-test_pearson_manhattan": 0.880588311422627,
121
+ "eval_sts-test_pearson_max": 0.880588311422627,
122
+ "eval_sts-test_spearman_cosine": 0.8905659331642088,
123
+ "eval_sts-test_spearman_dot": 0.8692084657204004,
124
+ "eval_sts-test_spearman_euclidean": 0.8809566840232712,
125
+ "eval_sts-test_spearman_manhattan": 0.883434007028195,
126
+ "eval_sts-test_spearman_max": 0.8905659331642088,
127
+ "eval_vitaminc-pairs_loss": 2.465860366821289,
128
+ "eval_vitaminc-pairs_runtime": 1.4615,
129
+ "eval_vitaminc-pairs_samples_per_second": 73.899,
130
+ "eval_vitaminc-pairs_steps_per_second": 1.368,
131
  "step": 27
132
  },
133
  {
134
+ "epoch": 0.15168539325842698,
135
+ "eval_negation-triplets_loss": 1.7310789823532104,
136
+ "eval_negation-triplets_runtime": 0.3009,
137
+ "eval_negation-triplets_samples_per_second": 212.692,
138
+ "eval_negation-triplets_steps_per_second": 3.323,
139
  "step": 27
140
  },
141
  {
142
+ "epoch": 0.15168539325842698,
143
+ "eval_scitail-pairs-pos_loss": 0.1150394082069397,
144
+ "eval_scitail-pairs-pos_runtime": 0.3739,
145
+ "eval_scitail-pairs-pos_samples_per_second": 144.431,
146
+ "eval_scitail-pairs-pos_steps_per_second": 2.675,
147
  "step": 27
148
  },
149
  {
150
+ "epoch": 0.15168539325842698,
151
+ "eval_xsum-pairs_loss": 0.11168850213289261,
152
+ "eval_xsum-pairs_runtime": 3.1697,
153
+ "eval_xsum-pairs_samples_per_second": 40.382,
154
+ "eval_xsum-pairs_steps_per_second": 0.631,
155
  "step": 27
156
  },
157
  {
158
+ "epoch": 0.15168539325842698,
159
+ "eval_sciq_pairs_loss": 0.03450964391231537,
160
+ "eval_sciq_pairs_runtime": 3.3283,
161
+ "eval_sciq_pairs_samples_per_second": 38.459,
162
+ "eval_sciq_pairs_steps_per_second": 0.601,
163
  "step": 27
164
  },
165
  {
166
+ "epoch": 0.15168539325842698,
167
+ "eval_qasc_pairs_loss": 0.11095743626356125,
168
+ "eval_qasc_pairs_runtime": 0.6261,
169
+ "eval_qasc_pairs_samples_per_second": 204.45,
170
+ "eval_qasc_pairs_steps_per_second": 3.195,
171
  "step": 27
172
  },
173
  {
174
+ "epoch": 0.15168539325842698,
175
+ "eval_openbookqa_pairs_loss": 0.7092063426971436,
176
+ "eval_openbookqa_pairs_runtime": 0.5866,
177
+ "eval_openbookqa_pairs_samples_per_second": 218.19,
178
+ "eval_openbookqa_pairs_steps_per_second": 3.409,
179
  "step": 27
180
  },
181
  {
182
+ "epoch": 0.15168539325842698,
183
+ "eval_msmarco_pairs_loss": 0.3955218493938446,
184
+ "eval_msmarco_pairs_runtime": 1.2942,
185
+ "eval_msmarco_pairs_samples_per_second": 98.902,
186
+ "eval_msmarco_pairs_steps_per_second": 1.545,
187
  "step": 27
188
  },
189
  {
190
+ "epoch": 0.15168539325842698,
191
+ "eval_nq_pairs_loss": 0.42051073908805847,
192
+ "eval_nq_pairs_runtime": 2.3875,
193
+ "eval_nq_pairs_samples_per_second": 53.612,
194
+ "eval_nq_pairs_steps_per_second": 0.838,
195
  "step": 27
196
  },
197
  {
198
+ "epoch": 0.15168539325842698,
199
+ "eval_trivia_pairs_loss": 0.93178790807724,
200
+ "eval_trivia_pairs_runtime": 4.4363,
201
+ "eval_trivia_pairs_samples_per_second": 28.853,
202
+ "eval_trivia_pairs_steps_per_second": 0.451,
203
  "step": 27
204
  },
205
  {
206
+ "epoch": 0.15168539325842698,
207
+ "eval_gooaq_pairs_loss": 0.6505913138389587,
208
+ "eval_gooaq_pairs_runtime": 0.8826,
209
+ "eval_gooaq_pairs_samples_per_second": 145.027,
210
+ "eval_gooaq_pairs_steps_per_second": 2.266,
211
  "step": 27
212
  },
213
  {
214
+ "epoch": 0.15168539325842698,
215
+ "eval_paws-pos_loss": 0.024931101128458977,
216
+ "eval_paws-pos_runtime": 0.6852,
217
+ "eval_paws-pos_samples_per_second": 186.805,
218
+ "eval_paws-pos_steps_per_second": 2.919,
219
  "step": 27
220
  },
221
  {
222
+ "epoch": 0.16853932584269662,
223
+ "grad_norm": 2.826900005340576,
224
+ "learning_rate": 1.4814814814814815e-05,
225
+ "loss": 0.5125,
226
  "step": 30
227
  },
228
  {
229
+ "epoch": 0.1853932584269663,
230
+ "grad_norm": 2.9938910007476807,
231
+ "learning_rate": 1.6296296296296297e-05,
232
+ "loss": 0.6885,
233
  "step": 33
234
  },
235
  {
236
+ "epoch": 0.20224719101123595,
237
+ "grad_norm": 3.3046395778656006,
238
+ "learning_rate": 1.7777777777777777e-05,
239
+ "loss": 0.6435,
240
  "step": 36
241
  },
242
  {
243
+ "epoch": 0.21910112359550563,
244
+ "grad_norm": 2.4184651374816895,
245
+ "learning_rate": 1.925925925925926e-05,
246
+ "loss": 0.753,
247
  "step": 39
248
  },
249
  {
250
+ "epoch": 0.23595505617977527,
251
+ "grad_norm": 2.9905433654785156,
252
+ "learning_rate": 2.074074074074074e-05,
253
+ "loss": 0.7427,
254
  "step": 42
255
  },
256
  {
257
+ "epoch": 0.25280898876404495,
258
+ "grad_norm": 2.745820999145508,
259
+ "learning_rate": 2.2222222222222227e-05,
260
+ "loss": 0.5083,
261
  "step": 45
262
  },
263
  {
264
+ "epoch": 0.2696629213483146,
265
+ "grad_norm": 2.6370577812194824,
266
+ "learning_rate": 2.3703703703703703e-05,
267
+ "loss": 0.7454,
268
  "step": 48
269
  },
270
  {
271
+ "epoch": 0.28651685393258425,
272
+ "grad_norm": 3.044011116027832,
273
+ "learning_rate": 2.5185185185185187e-05,
274
+ "loss": 0.8356,
275
  "step": 51
276
  },
277
  {
278
+ "epoch": 0.30337078651685395,
279
+ "grad_norm": 3.718804121017456,
280
+ "learning_rate": 2.6666666666666667e-05,
281
+ "loss": 0.8864,
282
  "step": 54
283
  },
284
  {
285
+ "epoch": 0.30337078651685395,
286
  "eval_NLI-v2_cosine_accuracy": 1.0,
287
+ "eval_NLI-v2_dot_accuracy": 0.0,
288
  "eval_NLI-v2_euclidean_accuracy": 1.0,
289
  "eval_NLI-v2_manhattan_accuracy": 1.0,
290
  "eval_NLI-v2_max_accuracy": 1.0,
291
+ "eval_VitaminC_cosine_accuracy": 0.57421875,
292
+ "eval_VitaminC_cosine_accuracy_threshold": 0.7991844415664673,
293
+ "eval_VitaminC_cosine_ap": 0.5485498837322925,
294
+ "eval_VitaminC_cosine_f1": 0.6595174262734584,
295
+ "eval_VitaminC_cosine_f1_threshold": 0.3160865008831024,
296
+ "eval_VitaminC_cosine_precision": 0.492,
297
+ "eval_VitaminC_cosine_recall": 1.0,
298
+ "eval_VitaminC_dot_accuracy": 0.578125,
299
+ "eval_VitaminC_dot_accuracy_threshold": 327.0416564941406,
300
+ "eval_VitaminC_dot_ap": 0.54993134882601,
301
+ "eval_VitaminC_dot_f1": 0.6595174262734584,
302
+ "eval_VitaminC_dot_f1_threshold": 117.44181060791016,
303
+ "eval_VitaminC_dot_precision": 0.492,
304
+ "eval_VitaminC_dot_recall": 1.0,
305
+ "eval_VitaminC_euclidean_accuracy": 0.57421875,
306
+ "eval_VitaminC_euclidean_accuracy_threshold": 13.019258499145508,
307
+ "eval_VitaminC_euclidean_ap": 0.5435066540334542,
308
+ "eval_VitaminC_euclidean_f1": 0.6577540106951871,
309
+ "eval_VitaminC_euclidean_f1_threshold": 23.688644409179688,
310
+ "eval_VitaminC_euclidean_precision": 0.4900398406374502,
311
+ "eval_VitaminC_euclidean_recall": 1.0,
312
+ "eval_VitaminC_manhattan_accuracy": 0.57421875,
313
+ "eval_VitaminC_manhattan_accuracy_threshold": 283.876220703125,
314
+ "eval_VitaminC_manhattan_ap": 0.5416615397828658,
315
+ "eval_VitaminC_manhattan_f1": 0.6559999999999999,
316
+ "eval_VitaminC_manhattan_f1_threshold": 514.0216064453125,
317
+ "eval_VitaminC_manhattan_precision": 0.4880952380952381,
318
+ "eval_VitaminC_manhattan_recall": 1.0,
319
+ "eval_VitaminC_max_accuracy": 0.578125,
320
+ "eval_VitaminC_max_accuracy_threshold": 327.0416564941406,
321
+ "eval_VitaminC_max_ap": 0.54993134882601,
322
+ "eval_VitaminC_max_f1": 0.6595174262734584,
323
+ "eval_VitaminC_max_f1_threshold": 514.0216064453125,
324
+ "eval_VitaminC_max_precision": 0.492,
325
+ "eval_VitaminC_max_recall": 1.0,
326
+ "eval_sequential_score": 0.54993134882601,
327
+ "eval_sts-test_pearson_cosine": 0.8452615878553369,
328
+ "eval_sts-test_pearson_dot": 0.8404858620687519,
329
+ "eval_sts-test_pearson_euclidean": 0.8780527810910925,
330
+ "eval_sts-test_pearson_manhattan": 0.878916157345712,
331
+ "eval_sts-test_pearson_max": 0.878916157345712,
332
+ "eval_sts-test_spearman_cosine": 0.8876915367075635,
333
+ "eval_sts-test_spearman_dot": 0.8608104875327304,
334
+ "eval_sts-test_spearman_euclidean": 0.8804138856889071,
335
+ "eval_sts-test_spearman_manhattan": 0.8822803815444743,
336
+ "eval_sts-test_spearman_max": 0.8876915367075635,
337
+ "eval_vitaminc-pairs_loss": 2.454524040222168,
338
+ "eval_vitaminc-pairs_runtime": 1.4583,
339
+ "eval_vitaminc-pairs_samples_per_second": 74.057,
340
+ "eval_vitaminc-pairs_steps_per_second": 1.371,
341
  "step": 54
342
  },
343
  {
344
+ "epoch": 0.30337078651685395,
345
+ "eval_negation-triplets_loss": 1.7277792692184448,
346
+ "eval_negation-triplets_runtime": 0.3027,
347
+ "eval_negation-triplets_samples_per_second": 211.436,
348
+ "eval_negation-triplets_steps_per_second": 3.304,
349
  "step": 54
350
  },
351
  {
352
+ "epoch": 0.30337078651685395,
353
+ "eval_scitail-pairs-pos_loss": 0.11168555170297623,
354
+ "eval_scitail-pairs-pos_runtime": 0.3726,
355
+ "eval_scitail-pairs-pos_samples_per_second": 144.911,
356
+ "eval_scitail-pairs-pos_steps_per_second": 2.684,
357
  "step": 54
358
  },
359
  {
360
+ "epoch": 0.30337078651685395,
361
+ "eval_xsum-pairs_loss": 0.10087604075670242,
362
+ "eval_xsum-pairs_runtime": 3.1701,
363
+ "eval_xsum-pairs_samples_per_second": 40.377,
364
+ "eval_xsum-pairs_steps_per_second": 0.631,
365
  "step": 54
366
  },
367
  {
368
+ "epoch": 0.30337078651685395,
369
+ "eval_sciq_pairs_loss": 0.03466618433594704,
370
+ "eval_sciq_pairs_runtime": 3.3778,
371
+ "eval_sciq_pairs_samples_per_second": 37.895,
372
+ "eval_sciq_pairs_steps_per_second": 0.592,
373
  "step": 54
374
  },
375
  {
376
+ "epoch": 0.30337078651685395,
377
+ "eval_qasc_pairs_loss": 0.10551701486110687,
378
+ "eval_qasc_pairs_runtime": 0.6271,
379
+ "eval_qasc_pairs_samples_per_second": 204.125,
380
+ "eval_qasc_pairs_steps_per_second": 3.189,
381
  "step": 54
382
  },
383
  {
384
+ "epoch": 0.30337078651685395,
385
+ "eval_openbookqa_pairs_loss": 0.7239958643913269,
386
+ "eval_openbookqa_pairs_runtime": 0.5811,
387
+ "eval_openbookqa_pairs_samples_per_second": 220.255,
388
+ "eval_openbookqa_pairs_steps_per_second": 3.441,
389
  "step": 54
390
  },
391
  {
392
+ "epoch": 0.30337078651685395,
393
+ "eval_msmarco_pairs_loss": 0.3808779716491699,
394
+ "eval_msmarco_pairs_runtime": 1.2919,
395
+ "eval_msmarco_pairs_samples_per_second": 99.082,
396
+ "eval_msmarco_pairs_steps_per_second": 1.548,
397
  "step": 54
398
  },
399
  {
400
+ "epoch": 0.30337078651685395,
401
+ "eval_nq_pairs_loss": 0.44170400500297546,
402
+ "eval_nq_pairs_runtime": 2.3835,
403
+ "eval_nq_pairs_samples_per_second": 53.703,
404
+ "eval_nq_pairs_steps_per_second": 0.839,
405
  "step": 54
406
  },
407
  {
408
+ "epoch": 0.30337078651685395,
409
+ "eval_trivia_pairs_loss": 0.9158428907394409,
410
+ "eval_trivia_pairs_runtime": 4.4326,
411
+ "eval_trivia_pairs_samples_per_second": 28.877,
412
+ "eval_trivia_pairs_steps_per_second": 0.451,
413
  "step": 54
414
  },
415
  {
416
+ "epoch": 0.30337078651685395,
417
+ "eval_gooaq_pairs_loss": 0.6208247542381287,
418
+ "eval_gooaq_pairs_runtime": 0.8797,
419
+ "eval_gooaq_pairs_samples_per_second": 145.497,
420
+ "eval_gooaq_pairs_steps_per_second": 2.273,
421
  "step": 54
422
  },
423
  {
424
+ "epoch": 0.30337078651685395,
425
+ "eval_paws-pos_loss": 0.02517784759402275,
426
+ "eval_paws-pos_runtime": 0.694,
427
+ "eval_paws-pos_samples_per_second": 184.442,
428
+ "eval_paws-pos_steps_per_second": 2.882,
429
  "step": 54
430
  },
431
  {
432
+ "epoch": 0.3202247191011236,
433
+ "grad_norm": 2.173736572265625,
434
+ "learning_rate": 2.814814814814815e-05,
435
+ "loss": 0.6015,
436
  "step": 57
437
  },
438
  {
439
+ "epoch": 0.33707865168539325,
440
+ "grad_norm": 3.8964712619781494,
441
+ "learning_rate": 2.962962962962963e-05,
442
+ "loss": 0.9482,
443
  "step": 60
444
  },
445
  {
446
+ "epoch": 0.3539325842696629,
447
+ "grad_norm": 2.659498691558838,
448
+ "learning_rate": 3.111111111111112e-05,
449
+ "loss": 0.5404,
450
  "step": 63
451
  },
452
  {
453
+ "epoch": 0.3707865168539326,
454
+ "grad_norm": 3.3499844074249268,
455
+ "learning_rate": 3.259259259259259e-05,
456
+ "loss": 0.805,
457
  "step": 66
458
  },
459
  {
460
+ "epoch": 0.38764044943820225,
461
+ "grad_norm": 3.770142078399658,
462
+ "learning_rate": 3.4074074074074077e-05,
463
+ "loss": 0.7184,
464
  "step": 69
465
  },
466
  {
467
+ "epoch": 0.4044943820224719,
468
+ "grad_norm": 3.740880012512207,
469
+ "learning_rate": 3.555555555555555e-05,
470
+ "loss": 0.8708,
471
  "step": 72
472
  },
473
  {
474
+ "epoch": 0.42134831460674155,
475
+ "grad_norm": 2.981106996536255,
476
+ "learning_rate": 3.703703703703704e-05,
477
+ "loss": 0.8327,
478
  "step": 75
479
  },
480
  {
481
+ "epoch": 0.43820224719101125,
482
+ "grad_norm": 2.3469011783599854,
483
+ "learning_rate": 3.851851851851852e-05,
484
+ "loss": 0.5025,
485
  "step": 78
486
  },
487
  {
488
+ "epoch": 0.4550561797752809,
489
+ "grad_norm": 3.296035051345825,
490
+ "learning_rate": 4e-05,
491
+ "loss": 0.6517,
492
  "step": 81
493
  },
494
  {
495
+ "epoch": 0.4550561797752809,
496
  "eval_NLI-v2_cosine_accuracy": 1.0,
497
  "eval_NLI-v2_dot_accuracy": 0.0,
498
  "eval_NLI-v2_euclidean_accuracy": 1.0,
499
  "eval_NLI-v2_manhattan_accuracy": 1.0,
500
  "eval_NLI-v2_max_accuracy": 1.0,
501
+ "eval_VitaminC_cosine_accuracy": 0.578125,
502
+ "eval_VitaminC_cosine_accuracy_threshold": 0.7859437465667725,
503
+ "eval_VitaminC_cosine_ap": 0.5557444337961499,
504
+ "eval_VitaminC_cosine_f1": 0.6595174262734584,
505
+ "eval_VitaminC_cosine_f1_threshold": 0.3211573362350464,
506
+ "eval_VitaminC_cosine_precision": 0.492,
507
  "eval_VitaminC_cosine_recall": 1.0,
508
+ "eval_VitaminC_dot_accuracy": 0.578125,
509
+ "eval_VitaminC_dot_accuracy_threshold": 315.9444580078125,
510
+ "eval_VitaminC_dot_ap": 0.5539524528858992,
511
+ "eval_VitaminC_dot_f1": 0.6595174262734584,
512
+ "eval_VitaminC_dot_f1_threshold": 129.88558959960938,
513
+ "eval_VitaminC_dot_precision": 0.492,
514
  "eval_VitaminC_dot_recall": 1.0,
515
+ "eval_VitaminC_euclidean_accuracy": 0.58203125,
516
+ "eval_VitaminC_euclidean_accuracy_threshold": 13.113249778747559,
517
+ "eval_VitaminC_euclidean_ap": 0.5510190217865811,
518
+ "eval_VitaminC_euclidean_f1": 0.6577540106951871,
519
+ "eval_VitaminC_euclidean_f1_threshold": 23.90462303161621,
520
+ "eval_VitaminC_euclidean_precision": 0.4900398406374502,
521
  "eval_VitaminC_euclidean_recall": 1.0,
522
+ "eval_VitaminC_manhattan_accuracy": 0.578125,
523
+ "eval_VitaminC_manhattan_accuracy_threshold": 276.40142822265625,
524
+ "eval_VitaminC_manhattan_ap": 0.5429240708188645,
525
+ "eval_VitaminC_manhattan_f1": 0.6576819407008085,
526
+ "eval_VitaminC_manhattan_f1_threshold": 469.7353515625,
527
+ "eval_VitaminC_manhattan_precision": 0.49193548387096775,
528
+ "eval_VitaminC_manhattan_recall": 0.991869918699187,
529
+ "eval_VitaminC_max_accuracy": 0.58203125,
530
+ "eval_VitaminC_max_accuracy_threshold": 315.9444580078125,
531
+ "eval_VitaminC_max_ap": 0.5557444337961499,
532
+ "eval_VitaminC_max_f1": 0.6595174262734584,
533
+ "eval_VitaminC_max_f1_threshold": 469.7353515625,
534
+ "eval_VitaminC_max_precision": 0.492,
535
  "eval_VitaminC_max_recall": 1.0,
536
+ "eval_sequential_score": 0.5557444337961499,
537
+ "eval_sts-test_pearson_cosine": 0.8483316632682467,
538
+ "eval_sts-test_pearson_dot": 0.8392403098680445,
539
+ "eval_sts-test_pearson_euclidean": 0.8814283057813619,
540
+ "eval_sts-test_pearson_manhattan": 0.8815226866327923,
541
+ "eval_sts-test_pearson_max": 0.8815226866327923,
542
+ "eval_sts-test_spearman_cosine": 0.8903503892346,
543
+ "eval_sts-test_spearman_dot": 0.857844431199042,
544
+ "eval_sts-test_spearman_euclidean": 0.8851830636663006,
545
+ "eval_sts-test_spearman_manhattan": 0.8865568876827619,
546
+ "eval_sts-test_spearman_max": 0.8903503892346,
547
+ "eval_vitaminc-pairs_loss": 2.3538782596588135,
548
+ "eval_vitaminc-pairs_runtime": 1.4618,
549
+ "eval_vitaminc-pairs_samples_per_second": 73.88,
550
+ "eval_vitaminc-pairs_steps_per_second": 1.368,
551
  "step": 81
552
  },
553
  {
554
+ "epoch": 0.4550561797752809,
555
+ "eval_negation-triplets_loss": 1.649215579032898,
556
+ "eval_negation-triplets_runtime": 0.3081,
557
+ "eval_negation-triplets_samples_per_second": 207.723,
558
+ "eval_negation-triplets_steps_per_second": 3.246,
559
  "step": 81
560
  },
561
  {
562
+ "epoch": 0.4550561797752809,
563
+ "eval_scitail-pairs-pos_loss": 0.11823470890522003,
564
+ "eval_scitail-pairs-pos_runtime": 0.376,
565
+ "eval_scitail-pairs-pos_samples_per_second": 143.616,
566
+ "eval_scitail-pairs-pos_steps_per_second": 2.66,
567
  "step": 81
568
  },
569
  {
570
+ "epoch": 0.4550561797752809,
571
+ "eval_xsum-pairs_loss": 0.08420603722333908,
572
+ "eval_xsum-pairs_runtime": 3.1576,
573
+ "eval_xsum-pairs_samples_per_second": 40.538,
574
+ "eval_xsum-pairs_steps_per_second": 0.633,
575
  "step": 81
576
  },
577
  {
578
+ "epoch": 0.4550561797752809,
579
+ "eval_sciq_pairs_loss": 0.034781794995069504,
580
+ "eval_sciq_pairs_runtime": 3.2597,
581
+ "eval_sciq_pairs_samples_per_second": 39.267,
582
+ "eval_sciq_pairs_steps_per_second": 0.614,
583
  "step": 81
584
  },
585
  {
586
+ "epoch": 0.4550561797752809,
587
+ "eval_qasc_pairs_loss": 0.10597346723079681,
588
+ "eval_qasc_pairs_runtime": 0.6245,
589
+ "eval_qasc_pairs_samples_per_second": 204.979,
590
+ "eval_qasc_pairs_steps_per_second": 3.203,
591
  "step": 81
592
  },
593
  {
594
+ "epoch": 0.4550561797752809,
595
+ "eval_openbookqa_pairs_loss": 0.7160983681678772,
596
+ "eval_openbookqa_pairs_runtime": 0.5767,
597
+ "eval_openbookqa_pairs_samples_per_second": 221.961,
598
+ "eval_openbookqa_pairs_steps_per_second": 3.468,
599
  "step": 81
600
  },
601
  {
602
+ "epoch": 0.4550561797752809,
603
+ "eval_msmarco_pairs_loss": 0.3454173803329468,
604
+ "eval_msmarco_pairs_runtime": 1.2912,
605
+ "eval_msmarco_pairs_samples_per_second": 99.134,
606
+ "eval_msmarco_pairs_steps_per_second": 1.549,
607
  "step": 81
608
  },
609
  {
610
+ "epoch": 0.4550561797752809,
611
+ "eval_nq_pairs_loss": 0.4442503750324249,
612
+ "eval_nq_pairs_runtime": 2.3854,
613
+ "eval_nq_pairs_samples_per_second": 53.659,
614
+ "eval_nq_pairs_steps_per_second": 0.838,
615
  "step": 81
616
  },
617
  {
618
+ "epoch": 0.4550561797752809,
619
+ "eval_trivia_pairs_loss": 0.9324482679367065,
620
+ "eval_trivia_pairs_runtime": 4.4251,
621
+ "eval_trivia_pairs_samples_per_second": 28.926,
622
+ "eval_trivia_pairs_steps_per_second": 0.452,
623
  "step": 81
624
  },
625
  {
626
+ "epoch": 0.4550561797752809,
627
+ "eval_gooaq_pairs_loss": 0.6094165444374084,
628
+ "eval_gooaq_pairs_runtime": 0.8751,
629
+ "eval_gooaq_pairs_samples_per_second": 146.261,
630
+ "eval_gooaq_pairs_steps_per_second": 2.285,
631
  "step": 81
632
  },
633
  {
634
+ "epoch": 0.4550561797752809,
635
+ "eval_paws-pos_loss": 0.024421451613307,
636
+ "eval_paws-pos_runtime": 0.6865,
637
+ "eval_paws-pos_samples_per_second": 186.444,
638
+ "eval_paws-pos_steps_per_second": 2.913,
639
  "step": 81
640
  },
641
  {
642
+ "epoch": 0.47191011235955055,
643
+ "grad_norm": 3.1395561695098877,
644
+ "learning_rate": 3.999675367909485e-05,
645
+ "loss": 0.5801,
646
  "step": 84
647
  },
648
  {
649
+ "epoch": 0.4887640449438202,
650
+ "grad_norm": 2.7977917194366455,
651
+ "learning_rate": 3.998701612152597e-05,
652
+ "loss": 0.791,
653
  "step": 87
654
  },
655
  {
656
+ "epoch": 0.5056179775280899,
657
+ "grad_norm": 2.3682048320770264,
658
+ "learning_rate": 3.997079154212493e-05,
659
+ "loss": 0.6042,
660
  "step": 90
661
  },
662
  {
663
+ "epoch": 0.5224719101123596,
664
+ "grad_norm": 2.843482255935669,
665
+ "learning_rate": 3.99480869635839e-05,
666
+ "loss": 0.7559,
667
  "step": 93
668
  },
669
  {
670
+ "epoch": 0.5393258426966292,
671
+ "grad_norm": 2.7346785068511963,
672
+ "learning_rate": 3.9918912213415936e-05,
673
+ "loss": 0.6258,
674
  "step": 96
675
  },
676
  {
677
+ "epoch": 0.5561797752808989,
678
+ "grad_norm": 3.149007558822632,
679
+ "learning_rate": 3.9883279919701226e-05,
680
+ "loss": 0.8853,
681
  "step": 99
682
  },
683
  {
684
+ "epoch": 0.5730337078651685,
685
+ "grad_norm": 3.3424761295318604,
686
+ "learning_rate": 3.9841205505621106e-05,
687
+ "loss": 0.5947,
688
  "step": 102
689
  },
690
  {
691
+ "epoch": 0.5898876404494382,
692
+ "grad_norm": 2.6377146244049072,
693
+ "learning_rate": 3.979270718278224e-05,
694
+ "loss": 0.644,
695
  "step": 105
696
  }
697
  ],
698
  "logging_steps": 3,
699
+ "max_steps": 534,
700
  "num_input_tokens_seen": 0,
701
  "num_train_epochs": 3,
702
  "save_steps": 107,
 
713
  }
714
  },
715
  "total_flos": 0.0,
716
+ "train_batch_size": 320,
717
  "trial_name": null,
718
  "trial_params": null
719
  }
checkpoint-107/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:526057a01875662c89243f2f1101012515e0f154fca67a38aba3fb44dcb2d6d0
3
  size 5688
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:72d6ebbf0ffc45e3199e7e67afe865d0f054853a38220ea09a039bd30fc6a761
3
  size 5688