GlycerinLOL commited on
Commit
e327a29
1 Parent(s): cc096d5

End of training

Browse files
Files changed (3) hide show
  1. all_results.json +5 -5
  2. train_results.json +5 -5
  3. trainer_state.json +233 -25
all_results.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "epoch": 19.99,
3
  "predict_f1": 0.9105,
4
  "predict_gen_len": 27.065819657667195,
5
  "predict_loss": 1.6921558380126953,
@@ -13,8 +13,8 @@
13
  "predict_samples": 11334,
14
  "predict_samples_per_second": 7.383,
15
  "predict_steps_per_second": 0.462,
16
- "train_loss": 0.3883497776129307,
17
- "train_runtime": 10238.8851,
18
- "train_samples_per_second": 97.667,
19
- "train_steps_per_second": 0.762
20
  }
 
1
  {
2
+ "epoch": 29.99,
3
  "predict_f1": 0.9105,
4
  "predict_gen_len": 27.065819657667195,
5
  "predict_loss": 1.6921558380126953,
 
13
  "predict_samples": 11334,
14
  "predict_samples_per_second": 7.383,
15
  "predict_steps_per_second": 0.462,
16
+ "train_loss": 0.37363835000584267,
17
+ "train_runtime": 14987.4083,
18
+ "train_samples_per_second": 100.084,
19
+ "train_steps_per_second": 0.781
20
  }
train_results.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
- "epoch": 19.99,
3
- "train_loss": 0.3883497776129307,
4
- "train_runtime": 10238.8851,
5
- "train_samples_per_second": 97.667,
6
- "train_steps_per_second": 0.762
7
  }
 
1
  {
2
+ "epoch": 29.99,
3
+ "train_loss": 0.37363835000584267,
4
+ "train_runtime": 14987.4083,
5
+ "train_samples_per_second": 100.084,
6
+ "train_steps_per_second": 0.781
7
  }
trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 19.990403071017276,
5
  "eval_steps": 500,
6
- "global_step": 7800,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -403,37 +403,245 @@
403
  "step": 7500
404
  },
405
  {
406
- "epoch": 19.99,
407
- "eval_f1": 0.9099,
408
- "eval_gen_len": 26.541090909090908,
409
- "eval_loss": 1.6258454322814941,
410
- "eval_precision": 0.9108,
411
- "eval_recall": 0.9093,
412
- "eval_rouge1": 0.4708,
413
- "eval_rouge2": 0.2214,
414
- "eval_rougeL": 0.3861,
415
- "eval_rougeLsum": 0.3863,
416
- "eval_runtime": 510.6936,
417
- "eval_samples_per_second": 5.385,
418
- "eval_steps_per_second": 0.337,
419
  "step": 7800
420
  },
421
  {
422
- "epoch": 19.99,
423
- "step": 7800,
424
- "total_flos": 1.441981436981674e+18,
425
- "train_loss": 0.3883497776129307,
426
- "train_runtime": 10238.8851,
427
- "train_samples_per_second": 97.667,
428
- "train_steps_per_second": 0.762
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
429
  }
430
  ],
431
  "logging_steps": 500,
432
- "max_steps": 7800,
433
  "num_input_tokens_seen": 0,
434
- "num_train_epochs": 20,
435
  "save_steps": 500,
436
- "total_flos": 1.441981436981674e+18,
437
  "train_batch_size": 32,
438
  "trial_name": null,
439
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 29.986564299424185,
5
  "eval_steps": 500,
6
+ "global_step": 11700,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
403
  "step": 7500
404
  },
405
  {
406
+ "epoch": 20.0,
407
+ "eval_f1": 0.91,
408
+ "eval_gen_len": 26.42509090909091,
409
+ "eval_loss": 1.620501160621643,
410
+ "eval_precision": 0.9111,
411
+ "eval_recall": 0.9092,
412
+ "eval_rouge1": 0.4727,
413
+ "eval_rouge2": 0.2223,
414
+ "eval_rougeL": 0.3876,
415
+ "eval_rougeLsum": 0.3876,
416
+ "eval_runtime": 508.7027,
417
+ "eval_samples_per_second": 5.406,
418
+ "eval_steps_per_second": 0.338,
419
  "step": 7800
420
  },
421
  {
422
+ "epoch": 20.51,
423
+ "learning_rate": 6.324786324786325e-06,
424
+ "loss": 1.6692,
425
+ "step": 8000
426
+ },
427
+ {
428
+ "epoch": 21.0,
429
+ "eval_f1": 0.9102,
430
+ "eval_gen_len": 26.748363636363635,
431
+ "eval_loss": 1.6152820587158203,
432
+ "eval_precision": 0.911,
433
+ "eval_recall": 0.9098,
434
+ "eval_rouge1": 0.4737,
435
+ "eval_rouge2": 0.2229,
436
+ "eval_rougeL": 0.388,
437
+ "eval_rougeLsum": 0.388,
438
+ "eval_runtime": 510.0415,
439
+ "eval_samples_per_second": 5.392,
440
+ "eval_steps_per_second": 0.337,
441
+ "step": 8191
442
+ },
443
+ {
444
+ "epoch": 21.79,
445
+ "learning_rate": 5.470085470085471e-06,
446
+ "loss": 1.6568,
447
+ "step": 8500
448
+ },
449
+ {
450
+ "epoch": 22.0,
451
+ "eval_f1": 0.9103,
452
+ "eval_gen_len": 26.532,
453
+ "eval_loss": 1.6104176044464111,
454
+ "eval_precision": 0.9113,
455
+ "eval_recall": 0.9096,
456
+ "eval_rouge1": 0.4733,
457
+ "eval_rouge2": 0.2221,
458
+ "eval_rougeL": 0.3885,
459
+ "eval_rougeLsum": 0.3886,
460
+ "eval_runtime": 508.5365,
461
+ "eval_samples_per_second": 5.408,
462
+ "eval_steps_per_second": 0.338,
463
+ "step": 8582
464
+ },
465
+ {
466
+ "epoch": 23.0,
467
+ "eval_f1": 0.9104,
468
+ "eval_gen_len": 26.82,
469
+ "eval_loss": 1.6056216955184937,
470
+ "eval_precision": 0.911,
471
+ "eval_recall": 0.9101,
472
+ "eval_rouge1": 0.4756,
473
+ "eval_rouge2": 0.2236,
474
+ "eval_rougeL": 0.3891,
475
+ "eval_rougeLsum": 0.3891,
476
+ "eval_runtime": 510.3093,
477
+ "eval_samples_per_second": 5.389,
478
+ "eval_steps_per_second": 0.337,
479
+ "step": 8973
480
+ },
481
+ {
482
+ "epoch": 23.07,
483
+ "learning_rate": 4.615384615384616e-06,
484
+ "loss": 1.6418,
485
+ "step": 9000
486
+ },
487
+ {
488
+ "epoch": 24.0,
489
+ "eval_f1": 0.9106,
490
+ "eval_gen_len": 26.55127272727273,
491
+ "eval_loss": 1.6021137237548828,
492
+ "eval_precision": 0.9115,
493
+ "eval_recall": 0.91,
494
+ "eval_rouge1": 0.476,
495
+ "eval_rouge2": 0.2246,
496
+ "eval_rougeL": 0.3903,
497
+ "eval_rougeLsum": 0.3903,
498
+ "eval_runtime": 506.3054,
499
+ "eval_samples_per_second": 5.432,
500
+ "eval_steps_per_second": 0.34,
501
+ "step": 9360
502
+ },
503
+ {
504
+ "epoch": 24.36,
505
+ "learning_rate": 3.760683760683761e-06,
506
+ "loss": 1.6319,
507
+ "step": 9500
508
+ },
509
+ {
510
+ "epoch": 25.0,
511
+ "eval_f1": 0.9105,
512
+ "eval_gen_len": 26.437454545454546,
513
+ "eval_loss": 1.5995395183563232,
514
+ "eval_precision": 0.9116,
515
+ "eval_recall": 0.9098,
516
+ "eval_rouge1": 0.4751,
517
+ "eval_rouge2": 0.2245,
518
+ "eval_rougeL": 0.3905,
519
+ "eval_rougeLsum": 0.3905,
520
+ "eval_runtime": 501.0862,
521
+ "eval_samples_per_second": 5.488,
522
+ "eval_steps_per_second": 0.343,
523
+ "step": 9751
524
+ },
525
+ {
526
+ "epoch": 25.64,
527
+ "learning_rate": 2.9059829059829063e-06,
528
+ "loss": 1.624,
529
+ "step": 10000
530
+ },
531
+ {
532
+ "epoch": 26.0,
533
+ "eval_f1": 0.9107,
534
+ "eval_gen_len": 26.60181818181818,
535
+ "eval_loss": 1.597448468208313,
536
+ "eval_precision": 0.9116,
537
+ "eval_recall": 0.9101,
538
+ "eval_rouge1": 0.4756,
539
+ "eval_rouge2": 0.2247,
540
+ "eval_rougeL": 0.3903,
541
+ "eval_rougeLsum": 0.3904,
542
+ "eval_runtime": 500.0077,
543
+ "eval_samples_per_second": 5.5,
544
+ "eval_steps_per_second": 0.344,
545
+ "step": 10142
546
+ },
547
+ {
548
+ "epoch": 26.92,
549
+ "learning_rate": 2.0512820512820513e-06,
550
+ "loss": 1.6184,
551
+ "step": 10500
552
+ },
553
+ {
554
+ "epoch": 27.0,
555
+ "eval_f1": 0.9106,
556
+ "eval_gen_len": 26.483272727272727,
557
+ "eval_loss": 1.5952799320220947,
558
+ "eval_precision": 0.9116,
559
+ "eval_recall": 0.9099,
560
+ "eval_rouge1": 0.4747,
561
+ "eval_rouge2": 0.2231,
562
+ "eval_rougeL": 0.3899,
563
+ "eval_rougeLsum": 0.3899,
564
+ "eval_runtime": 505.7256,
565
+ "eval_samples_per_second": 5.438,
566
+ "eval_steps_per_second": 0.34,
567
+ "step": 10533
568
+ },
569
+ {
570
+ "epoch": 28.0,
571
+ "eval_f1": 0.9107,
572
+ "eval_gen_len": 26.560363636363636,
573
+ "eval_loss": 1.594251036643982,
574
+ "eval_precision": 0.9116,
575
+ "eval_recall": 0.9102,
576
+ "eval_rouge1": 0.4758,
577
+ "eval_rouge2": 0.2243,
578
+ "eval_rougeL": 0.3907,
579
+ "eval_rougeLsum": 0.3908,
580
+ "eval_runtime": 503.0877,
581
+ "eval_samples_per_second": 5.466,
582
+ "eval_steps_per_second": 0.342,
583
+ "step": 10923
584
+ },
585
+ {
586
+ "epoch": 28.2,
587
+ "learning_rate": 1.1965811965811968e-06,
588
+ "loss": 1.6126,
589
+ "step": 11000
590
+ },
591
+ {
592
+ "epoch": 29.0,
593
+ "eval_f1": 0.9108,
594
+ "eval_gen_len": 26.577454545454547,
595
+ "eval_loss": 1.5936089754104614,
596
+ "eval_precision": 0.9117,
597
+ "eval_recall": 0.9103,
598
+ "eval_rouge1": 0.4776,
599
+ "eval_rouge2": 0.226,
600
+ "eval_rougeL": 0.3926,
601
+ "eval_rougeLsum": 0.3926,
602
+ "eval_runtime": 506.8028,
603
+ "eval_samples_per_second": 5.426,
604
+ "eval_steps_per_second": 0.339,
605
+ "step": 11314
606
+ },
607
+ {
608
+ "epoch": 29.47,
609
+ "learning_rate": 3.4188034188034194e-07,
610
+ "loss": 1.6148,
611
+ "step": 11500
612
+ },
613
+ {
614
+ "epoch": 29.99,
615
+ "eval_f1": 0.9108,
616
+ "eval_gen_len": 26.555636363636363,
617
+ "eval_loss": 1.5934168100357056,
618
+ "eval_precision": 0.9117,
619
+ "eval_recall": 0.9103,
620
+ "eval_rouge1": 0.4774,
621
+ "eval_rouge2": 0.2259,
622
+ "eval_rougeL": 0.3926,
623
+ "eval_rougeLsum": 0.3926,
624
+ "eval_runtime": 505.9746,
625
+ "eval_samples_per_second": 5.435,
626
+ "eval_steps_per_second": 0.34,
627
+ "step": 11700
628
+ },
629
+ {
630
+ "epoch": 29.99,
631
+ "step": 11700,
632
+ "total_flos": 2.1629605976148541e+18,
633
+ "train_loss": 0.37363835000584267,
634
+ "train_runtime": 14987.4083,
635
+ "train_samples_per_second": 100.084,
636
+ "train_steps_per_second": 0.781
637
  }
638
  ],
639
  "logging_steps": 500,
640
+ "max_steps": 11700,
641
  "num_input_tokens_seen": 0,
642
+ "num_train_epochs": 30,
643
  "save_steps": 500,
644
+ "total_flos": 2.1629605976148541e+18,
645
  "train_batch_size": 32,
646
  "trial_name": null,
647
  "trial_params": null