humbertonc commited on
Commit
a479a21
1 Parent(s): 4979389

Training in progress, step 100, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0b39b20829bb91d3b3d191b8a08ffadc90a3d3bdebc708bb24bc2638975fc44d
3
  size 83945296
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1048b2a328a08ec24a848221a1788ec650be93f63d5b736198828a464725a0fc
3
  size 83945296
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:991be9df2cbd17b97e6c203028b7b1c8f1dd9349cb10729a00d310248bd4689b
3
  size 42545748
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:76a1752ade8f1c4357a65263add01bab40bcce13a310f606f37f5f889c14a4be
3
  size 42545748
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5d04d81b6f29c6bcf392c89d3a394ddfc4da22eb6e72895d54ef58cddcf6e205
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9fa36bed58f6761e1972921e30943b30143ac363ab2649891a7e0c3d7c4a104d
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:65ae43dd97ccea29d635c4ebaabb6ed265ebe5b4f2988f15738ce29df7227976
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e154dc8343bffdd08d271f4636f8ed6babc23043b834ad7a54a9ce06eab8ee7c
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.006108735491753207,
5
  "eval_steps": 1000,
6
- "global_step": 50,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -307,13 +307,313 @@
307
  "learning_rate": 0.00011578947368421053,
308
  "loss": 0.6551,
309
  "step": 50
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
310
  }
311
  ],
312
  "logging_steps": 1,
313
  "max_steps": 100,
314
  "num_train_epochs": 1,
315
  "save_steps": 50,
316
- "total_flos": 2959427434954752.0,
317
  "trial_name": null,
318
  "trial_params": null
319
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.012217470983506415,
5
  "eval_steps": 1000,
6
+ "global_step": 100,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
307
  "learning_rate": 0.00011578947368421053,
308
  "loss": 0.6551,
309
  "step": 50
310
+ },
311
+ {
312
+ "epoch": 0.01,
313
+ "learning_rate": 0.0001136842105263158,
314
+ "loss": 0.937,
315
+ "step": 51
316
+ },
317
+ {
318
+ "epoch": 0.01,
319
+ "learning_rate": 0.00011157894736842105,
320
+ "loss": 0.5675,
321
+ "step": 52
322
+ },
323
+ {
324
+ "epoch": 0.01,
325
+ "learning_rate": 0.00010947368421052633,
326
+ "loss": 0.9796,
327
+ "step": 53
328
+ },
329
+ {
330
+ "epoch": 0.01,
331
+ "learning_rate": 0.00010736842105263158,
332
+ "loss": 0.7879,
333
+ "step": 54
334
+ },
335
+ {
336
+ "epoch": 0.01,
337
+ "learning_rate": 0.00010526315789473685,
338
+ "loss": 0.4604,
339
+ "step": 55
340
+ },
341
+ {
342
+ "epoch": 0.01,
343
+ "learning_rate": 0.00010315789473684211,
344
+ "loss": 1.0179,
345
+ "step": 56
346
+ },
347
+ {
348
+ "epoch": 0.01,
349
+ "learning_rate": 0.00010105263157894738,
350
+ "loss": 0.7213,
351
+ "step": 57
352
+ },
353
+ {
354
+ "epoch": 0.01,
355
+ "learning_rate": 9.894736842105263e-05,
356
+ "loss": 0.6945,
357
+ "step": 58
358
+ },
359
+ {
360
+ "epoch": 0.01,
361
+ "learning_rate": 9.68421052631579e-05,
362
+ "loss": 0.7157,
363
+ "step": 59
364
+ },
365
+ {
366
+ "epoch": 0.01,
367
+ "learning_rate": 9.473684210526316e-05,
368
+ "loss": 0.6104,
369
+ "step": 60
370
+ },
371
+ {
372
+ "epoch": 0.01,
373
+ "learning_rate": 9.263157894736843e-05,
374
+ "loss": 0.7312,
375
+ "step": 61
376
+ },
377
+ {
378
+ "epoch": 0.01,
379
+ "learning_rate": 9.052631578947369e-05,
380
+ "loss": 0.6224,
381
+ "step": 62
382
+ },
383
+ {
384
+ "epoch": 0.01,
385
+ "learning_rate": 8.842105263157894e-05,
386
+ "loss": 0.5421,
387
+ "step": 63
388
+ },
389
+ {
390
+ "epoch": 0.01,
391
+ "learning_rate": 8.631578947368421e-05,
392
+ "loss": 0.6406,
393
+ "step": 64
394
+ },
395
+ {
396
+ "epoch": 0.01,
397
+ "learning_rate": 8.421052631578948e-05,
398
+ "loss": 0.5584,
399
+ "step": 65
400
+ },
401
+ {
402
+ "epoch": 0.01,
403
+ "learning_rate": 8.210526315789474e-05,
404
+ "loss": 0.5135,
405
+ "step": 66
406
+ },
407
+ {
408
+ "epoch": 0.01,
409
+ "learning_rate": 8e-05,
410
+ "loss": 0.5533,
411
+ "step": 67
412
+ },
413
+ {
414
+ "epoch": 0.01,
415
+ "learning_rate": 7.789473684210526e-05,
416
+ "loss": 0.5121,
417
+ "step": 68
418
+ },
419
+ {
420
+ "epoch": 0.01,
421
+ "learning_rate": 7.578947368421054e-05,
422
+ "loss": 0.6168,
423
+ "step": 69
424
+ },
425
+ {
426
+ "epoch": 0.01,
427
+ "learning_rate": 7.368421052631579e-05,
428
+ "loss": 0.8289,
429
+ "step": 70
430
+ },
431
+ {
432
+ "epoch": 0.01,
433
+ "learning_rate": 7.157894736842105e-05,
434
+ "loss": 0.578,
435
+ "step": 71
436
+ },
437
+ {
438
+ "epoch": 0.01,
439
+ "learning_rate": 6.947368421052632e-05,
440
+ "loss": 0.834,
441
+ "step": 72
442
+ },
443
+ {
444
+ "epoch": 0.01,
445
+ "learning_rate": 6.736842105263159e-05,
446
+ "loss": 0.5183,
447
+ "step": 73
448
+ },
449
+ {
450
+ "epoch": 0.01,
451
+ "learning_rate": 6.526315789473685e-05,
452
+ "loss": 0.5795,
453
+ "step": 74
454
+ },
455
+ {
456
+ "epoch": 0.01,
457
+ "learning_rate": 6.31578947368421e-05,
458
+ "loss": 0.6634,
459
+ "step": 75
460
+ },
461
+ {
462
+ "epoch": 0.01,
463
+ "learning_rate": 6.105263157894737e-05,
464
+ "loss": 0.5462,
465
+ "step": 76
466
+ },
467
+ {
468
+ "epoch": 0.01,
469
+ "learning_rate": 5.894736842105263e-05,
470
+ "loss": 0.6116,
471
+ "step": 77
472
+ },
473
+ {
474
+ "epoch": 0.01,
475
+ "learning_rate": 5.68421052631579e-05,
476
+ "loss": 1.0132,
477
+ "step": 78
478
+ },
479
+ {
480
+ "epoch": 0.01,
481
+ "learning_rate": 5.4736842105263165e-05,
482
+ "loss": 0.4689,
483
+ "step": 79
484
+ },
485
+ {
486
+ "epoch": 0.01,
487
+ "learning_rate": 5.2631578947368424e-05,
488
+ "loss": 0.6789,
489
+ "step": 80
490
+ },
491
+ {
492
+ "epoch": 0.01,
493
+ "learning_rate": 5.052631578947369e-05,
494
+ "loss": 0.8955,
495
+ "step": 81
496
+ },
497
+ {
498
+ "epoch": 0.01,
499
+ "learning_rate": 4.842105263157895e-05,
500
+ "loss": 0.4773,
501
+ "step": 82
502
+ },
503
+ {
504
+ "epoch": 0.01,
505
+ "learning_rate": 4.6315789473684214e-05,
506
+ "loss": 0.6307,
507
+ "step": 83
508
+ },
509
+ {
510
+ "epoch": 0.01,
511
+ "learning_rate": 4.421052631578947e-05,
512
+ "loss": 0.5953,
513
+ "step": 84
514
+ },
515
+ {
516
+ "epoch": 0.01,
517
+ "learning_rate": 4.210526315789474e-05,
518
+ "loss": 0.5905,
519
+ "step": 85
520
+ },
521
+ {
522
+ "epoch": 0.01,
523
+ "learning_rate": 4e-05,
524
+ "loss": 0.8394,
525
+ "step": 86
526
+ },
527
+ {
528
+ "epoch": 0.01,
529
+ "learning_rate": 3.789473684210527e-05,
530
+ "loss": 0.5808,
531
+ "step": 87
532
+ },
533
+ {
534
+ "epoch": 0.01,
535
+ "learning_rate": 3.578947368421053e-05,
536
+ "loss": 0.6196,
537
+ "step": 88
538
+ },
539
+ {
540
+ "epoch": 0.01,
541
+ "learning_rate": 3.368421052631579e-05,
542
+ "loss": 0.81,
543
+ "step": 89
544
+ },
545
+ {
546
+ "epoch": 0.01,
547
+ "learning_rate": 3.157894736842105e-05,
548
+ "loss": 0.7769,
549
+ "step": 90
550
+ },
551
+ {
552
+ "epoch": 0.01,
553
+ "learning_rate": 2.9473684210526314e-05,
554
+ "loss": 0.6618,
555
+ "step": 91
556
+ },
557
+ {
558
+ "epoch": 0.01,
559
+ "learning_rate": 2.7368421052631583e-05,
560
+ "loss": 0.4963,
561
+ "step": 92
562
+ },
563
+ {
564
+ "epoch": 0.01,
565
+ "learning_rate": 2.5263157894736845e-05,
566
+ "loss": 0.6149,
567
+ "step": 93
568
+ },
569
+ {
570
+ "epoch": 0.01,
571
+ "learning_rate": 2.3157894736842107e-05,
572
+ "loss": 0.7242,
573
+ "step": 94
574
+ },
575
+ {
576
+ "epoch": 0.01,
577
+ "learning_rate": 2.105263157894737e-05,
578
+ "loss": 0.9024,
579
+ "step": 95
580
+ },
581
+ {
582
+ "epoch": 0.01,
583
+ "learning_rate": 1.8947368421052634e-05,
584
+ "loss": 0.8663,
585
+ "step": 96
586
+ },
587
+ {
588
+ "epoch": 0.01,
589
+ "learning_rate": 1.6842105263157896e-05,
590
+ "loss": 0.4789,
591
+ "step": 97
592
+ },
593
+ {
594
+ "epoch": 0.01,
595
+ "learning_rate": 1.4736842105263157e-05,
596
+ "loss": 0.7588,
597
+ "step": 98
598
+ },
599
+ {
600
+ "epoch": 0.01,
601
+ "learning_rate": 1.2631578947368422e-05,
602
+ "loss": 1.0309,
603
+ "step": 99
604
+ },
605
+ {
606
+ "epoch": 0.01,
607
+ "learning_rate": 1.0526315789473684e-05,
608
+ "loss": 0.7587,
609
+ "step": 100
610
  }
611
  ],
612
  "logging_steps": 1,
613
  "max_steps": 100,
614
  "num_train_epochs": 1,
615
  "save_steps": 50,
616
+ "total_flos": 5806574461747200.0,
617
  "trial_name": null,
618
  "trial_params": null
619
  }