jflotz commited on
Commit
933465f
1 Parent(s): 9efe1ec

Training in progress, step 130000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f5c1a1f468fc2d342a23f1ac0d5752569966a310ca13ca1f923b43583a411e84
3
  size 50044689
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eb9dcc4ff2bc74ab69f0263634058bb8fb9cc796ba30099ac6af07a524413216
3
  size 50044689
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:65a92e8166f8fa9a54e98da57dbdf5c7d7bdbe9bb899a7638fe74e0582796f58
3
  size 25761253
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:77770474995765e28cd3f772259c3b9f70956913fcf88d26cfbea63dec9f29f8
3
  size 25761253
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:44b70180124cd582ce1fec2362a6c7b1500fac5e37241e6e64ee705ea6e177b8
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3017ce4acc395c8911d9ef1e39e206b06d5a44dba6f5be7a0a365fda3aceface
3
  size 14503
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:44b70180124cd582ce1fec2362a6c7b1500fac5e37241e6e64ee705ea6e177b8
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3017ce4acc395c8911d9ef1e39e206b06d5a44dba6f5be7a0a365fda3aceface
3
  size 14503
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:44b70180124cd582ce1fec2362a6c7b1500fac5e37241e6e64ee705ea6e177b8
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3017ce4acc395c8911d9ef1e39e206b06d5a44dba6f5be7a0a365fda3aceface
3
  size 14503
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:44b70180124cd582ce1fec2362a6c7b1500fac5e37241e6e64ee705ea6e177b8
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3017ce4acc395c8911d9ef1e39e206b06d5a44dba6f5be7a0a365fda3aceface
3
  size 14503
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:44b70180124cd582ce1fec2362a6c7b1500fac5e37241e6e64ee705ea6e177b8
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3017ce4acc395c8911d9ef1e39e206b06d5a44dba6f5be7a0a365fda3aceface
3
  size 14503
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:44b70180124cd582ce1fec2362a6c7b1500fac5e37241e6e64ee705ea6e177b8
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3017ce4acc395c8911d9ef1e39e206b06d5a44dba6f5be7a0a365fda3aceface
3
  size 14503
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:44b70180124cd582ce1fec2362a6c7b1500fac5e37241e6e64ee705ea6e177b8
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3017ce4acc395c8911d9ef1e39e206b06d5a44dba6f5be7a0a365fda3aceface
3
  size 14503
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:44b70180124cd582ce1fec2362a6c7b1500fac5e37241e6e64ee705ea6e177b8
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3017ce4acc395c8911d9ef1e39e206b06d5a44dba6f5be7a0a365fda3aceface
3
  size 14503
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c534c69c702472a462ee5c5c8e40f09fd7295b5cb464c1c3364c271724c3d05b
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fa61e63d6ec853afa02e48d5167bab30a383bd9f05f192b20c686fb9a3478097
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 5.353319057815845,
5
- "global_step": 120000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -2406,11 +2406,211 @@
2406
  "eval_samples_per_second": 1070.506,
2407
  "eval_steps_per_second": 16.778,
2408
  "step": 120000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2409
  }
2410
  ],
2411
  "max_steps": 250000,
2412
  "num_train_epochs": 12,
2413
- "total_flos": 1.9219803744787812e+21,
2414
  "trial_name": null,
2415
  "trial_params": null
2416
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 5.799428979300499,
5
+ "global_step": 130000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
2406
  "eval_samples_per_second": 1070.506,
2407
  "eval_steps_per_second": 16.778,
2408
  "step": 120000
2409
+ },
2410
+ {
2411
+ "epoch": 5.38,
2412
+ "learning_rate": 0.0003468073028890751,
2413
+ "loss": 0.3886,
2414
+ "step": 120500
2415
+ },
2416
+ {
2417
+ "epoch": 5.4,
2418
+ "learning_rate": 0.0003448750009162916,
2419
+ "loss": 0.3887,
2420
+ "step": 121000
2421
+ },
2422
+ {
2423
+ "epoch": 5.4,
2424
+ "eval_loss": 0.3600543737411499,
2425
+ "eval_runtime": 2.1567,
2426
+ "eval_samples_per_second": 1065.071,
2427
+ "eval_steps_per_second": 16.692,
2428
+ "step": 121000
2429
+ },
2430
+ {
2431
+ "epoch": 5.42,
2432
+ "learning_rate": 0.00034294095468179094,
2433
+ "loss": 0.3881,
2434
+ "step": 121500
2435
+ },
2436
+ {
2437
+ "epoch": 5.44,
2438
+ "learning_rate": 0.00034100524878702073,
2439
+ "loss": 0.3883,
2440
+ "step": 122000
2441
+ },
2442
+ {
2443
+ "epoch": 5.44,
2444
+ "eval_loss": 0.3610630929470062,
2445
+ "eval_runtime": 2.1629,
2446
+ "eval_samples_per_second": 1061.977,
2447
+ "eval_steps_per_second": 16.644,
2448
+ "step": 122000
2449
+ },
2450
+ {
2451
+ "epoch": 5.46,
2452
+ "learning_rate": 0.000339067967906028,
2453
+ "loss": 0.4695,
2454
+ "step": 122500
2455
+ },
2456
+ {
2457
+ "epoch": 5.49,
2458
+ "learning_rate": 0.0003371291967817539,
2459
+ "loss": 0.6776,
2460
+ "step": 123000
2461
+ },
2462
+ {
2463
+ "epoch": 5.49,
2464
+ "eval_loss": 0.6768860220909119,
2465
+ "eval_runtime": 2.1959,
2466
+ "eval_samples_per_second": 1046.056,
2467
+ "eval_steps_per_second": 16.394,
2468
+ "step": 123000
2469
+ },
2470
+ {
2471
+ "epoch": 5.51,
2472
+ "learning_rate": 0.0003351890202223285,
2473
+ "loss": 0.6171,
2474
+ "step": 123500
2475
+ },
2476
+ {
2477
+ "epoch": 5.53,
2478
+ "learning_rate": 0.0003332475230973597,
2479
+ "loss": 0.3917,
2480
+ "step": 124000
2481
+ },
2482
+ {
2483
+ "epoch": 5.53,
2484
+ "eval_loss": 0.36259856820106506,
2485
+ "eval_runtime": 2.1732,
2486
+ "eval_samples_per_second": 1056.948,
2487
+ "eval_steps_per_second": 16.565,
2488
+ "step": 124000
2489
+ },
2490
+ {
2491
+ "epoch": 5.55,
2492
+ "learning_rate": 0.00033130479033422134,
2493
+ "loss": 0.3889,
2494
+ "step": 124500
2495
+ },
2496
+ {
2497
+ "epoch": 5.58,
2498
+ "learning_rate": 0.0003293609069143381,
2499
+ "loss": 0.3897,
2500
+ "step": 125000
2501
+ },
2502
+ {
2503
+ "epoch": 5.58,
2504
+ "eval_loss": 0.3617185652256012,
2505
+ "eval_runtime": 2.1669,
2506
+ "eval_samples_per_second": 1060.024,
2507
+ "eval_steps_per_second": 16.613,
2508
+ "step": 125000
2509
+ },
2510
+ {
2511
+ "epoch": 5.6,
2512
+ "learning_rate": 0.00032741595786946783,
2513
+ "loss": 0.3879,
2514
+ "step": 125500
2515
+ },
2516
+ {
2517
+ "epoch": 5.62,
2518
+ "learning_rate": 0.000325470028277983,
2519
+ "loss": 0.3869,
2520
+ "step": 126000
2521
+ },
2522
+ {
2523
+ "epoch": 5.62,
2524
+ "eval_loss": 0.35780513286590576,
2525
+ "eval_runtime": 2.1966,
2526
+ "eval_samples_per_second": 1045.71,
2527
+ "eval_steps_per_second": 16.389,
2528
+ "step": 126000
2529
+ },
2530
+ {
2531
+ "epoch": 5.64,
2532
+ "learning_rate": 0.00032352320326114754,
2533
+ "loss": 0.3868,
2534
+ "step": 126500
2535
+ },
2536
+ {
2537
+ "epoch": 5.67,
2538
+ "learning_rate": 0.00032157556797939436,
2539
+ "loss": 0.3864,
2540
+ "step": 127000
2541
+ },
2542
+ {
2543
+ "epoch": 5.67,
2544
+ "eval_loss": 0.35783183574676514,
2545
+ "eval_runtime": 2.2195,
2546
+ "eval_samples_per_second": 1034.94,
2547
+ "eval_steps_per_second": 16.22,
2548
+ "step": 127000
2549
+ },
2550
+ {
2551
+ "epoch": 5.69,
2552
+ "learning_rate": 0.00031962720762860057,
2553
+ "loss": 0.3865,
2554
+ "step": 127500
2555
+ },
2556
+ {
2557
+ "epoch": 5.71,
2558
+ "learning_rate": 0.0003176782074363595,
2559
+ "loss": 0.3862,
2560
+ "step": 128000
2561
+ },
2562
+ {
2563
+ "epoch": 5.71,
2564
+ "eval_loss": 0.35733312368392944,
2565
+ "eval_runtime": 2.1604,
2566
+ "eval_samples_per_second": 1063.224,
2567
+ "eval_steps_per_second": 16.664,
2568
+ "step": 128000
2569
+ },
2570
+ {
2571
+ "epoch": 5.73,
2572
+ "learning_rate": 0.0003157286526582535,
2573
+ "loss": 0.3858,
2574
+ "step": 128500
2575
+ },
2576
+ {
2577
+ "epoch": 5.75,
2578
+ "learning_rate": 0.0003137786285741241,
2579
+ "loss": 0.3855,
2580
+ "step": 129000
2581
+ },
2582
+ {
2583
+ "epoch": 5.75,
2584
+ "eval_loss": 0.3577572703361511,
2585
+ "eval_runtime": 2.1724,
2586
+ "eval_samples_per_second": 1057.343,
2587
+ "eval_steps_per_second": 16.571,
2588
+ "step": 129000
2589
+ },
2590
+ {
2591
+ "epoch": 5.78,
2592
+ "learning_rate": 0.0003118282204843421,
2593
+ "loss": 0.3854,
2594
+ "step": 129500
2595
+ },
2596
+ {
2597
+ "epoch": 5.8,
2598
+ "learning_rate": 0.0003098775137060758,
2599
+ "loss": 0.3854,
2600
+ "step": 130000
2601
+ },
2602
+ {
2603
+ "epoch": 5.8,
2604
+ "eval_loss": 0.357120543718338,
2605
+ "eval_runtime": 2.158,
2606
+ "eval_samples_per_second": 1064.4,
2607
+ "eval_steps_per_second": 16.682,
2608
+ "step": 130000
2609
  }
2610
  ],
2611
  "max_steps": 250000,
2612
  "num_train_epochs": 12,
2613
+ "total_flos": 2.0821483773445852e+21,
2614
  "trial_name": null,
2615
  "trial_params": null
2616
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:65a92e8166f8fa9a54e98da57dbdf5c7d7bdbe9bb899a7638fe74e0582796f58
3
  size 25761253
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:77770474995765e28cd3f772259c3b9f70956913fcf88d26cfbea63dec9f29f8
3
  size 25761253