plip commited on
Commit
91ab334
1 Parent(s): 84369b0

Training in progress, step 120000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:49758088a73ffa86cfa7391b8520727f0e91c812a8d23680b3ffbf53509abe86
3
  size 202194449
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c1e0bc201a48037c117cf182b1af5c6f4d1de81608cab594dcb7b0df89e16d70
3
  size 202194449
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2d6b8966d1cc83e4388837c4e32ca9b57abfb21fb0d307bbaed74f29719988a7
3
  size 102501541
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:23090dda99868abea6a7796871fd763d22a4e00fff59d8a65d78e70f06487cae
3
  size 102501541
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3a623d5977f2ffdfbfa35a1493256efb0eabb662e5c36d8f15402dd8b77d8717
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ca5bc32c00ec3a14d611f2862ea85a2017db3bf307d1ec3917eebae34f57b515
3
  size 14503
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3a623d5977f2ffdfbfa35a1493256efb0eabb662e5c36d8f15402dd8b77d8717
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ca5bc32c00ec3a14d611f2862ea85a2017db3bf307d1ec3917eebae34f57b515
3
  size 14503
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3a623d5977f2ffdfbfa35a1493256efb0eabb662e5c36d8f15402dd8b77d8717
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ca5bc32c00ec3a14d611f2862ea85a2017db3bf307d1ec3917eebae34f57b515
3
  size 14503
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3a623d5977f2ffdfbfa35a1493256efb0eabb662e5c36d8f15402dd8b77d8717
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ca5bc32c00ec3a14d611f2862ea85a2017db3bf307d1ec3917eebae34f57b515
3
  size 14503
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3a623d5977f2ffdfbfa35a1493256efb0eabb662e5c36d8f15402dd8b77d8717
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ca5bc32c00ec3a14d611f2862ea85a2017db3bf307d1ec3917eebae34f57b515
3
  size 14503
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3a623d5977f2ffdfbfa35a1493256efb0eabb662e5c36d8f15402dd8b77d8717
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ca5bc32c00ec3a14d611f2862ea85a2017db3bf307d1ec3917eebae34f57b515
3
  size 14503
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3a623d5977f2ffdfbfa35a1493256efb0eabb662e5c36d8f15402dd8b77d8717
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ca5bc32c00ec3a14d611f2862ea85a2017db3bf307d1ec3917eebae34f57b515
3
  size 14503
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3a623d5977f2ffdfbfa35a1493256efb0eabb662e5c36d8f15402dd8b77d8717
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ca5bc32c00ec3a14d611f2862ea85a2017db3bf307d1ec3917eebae34f57b515
3
  size 14503
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a36392859753540b5ff28a6690e0fb35c1157de322529d1ae210898db91ddda7
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:60ddf5c33f60196860027b40e8443017b8ed479cc5dfb73f8a076fe07f546fd4
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 2.803404862633162,
5
- "global_step": 110000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -2206,11 +2206,211 @@
2206
  "eval_samples_per_second": 824.682,
2207
  "eval_steps_per_second": 13.195,
2208
  "step": 110000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2209
  }
2210
  ],
2211
  "max_steps": 500000,
2212
  "num_train_epochs": 13,
2213
- "total_flos": 3.51433156869341e+21,
2214
  "trial_name": null,
2215
  "trial_params": null
2216
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 3.058259850145267,
5
+ "global_step": 120000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
2206
  "eval_samples_per_second": 824.682,
2207
  "eval_steps_per_second": 13.195,
2208
  "step": 110000
2209
+ },
2210
+ {
2211
+ "epoch": 2.82,
2212
+ "learning_rate": 0.0002774275491977922,
2213
+ "loss": 0.3111,
2214
+ "step": 110500
2215
+ },
2216
+ {
2217
+ "epoch": 2.83,
2218
+ "learning_rate": 0.0002771699479464853,
2219
+ "loss": 0.3089,
2220
+ "step": 111000
2221
+ },
2222
+ {
2223
+ "epoch": 2.83,
2224
+ "eval_loss": 0.8333858847618103,
2225
+ "eval_runtime": 1.2664,
2226
+ "eval_samples_per_second": 789.664,
2227
+ "eval_steps_per_second": 12.635,
2228
+ "step": 111000
2229
+ },
2230
+ {
2231
+ "epoch": 2.84,
2232
+ "learning_rate": 0.00027691101066420104,
2233
+ "loss": 0.3091,
2234
+ "step": 111500
2235
+ },
2236
+ {
2237
+ "epoch": 2.85,
2238
+ "learning_rate": 0.0002766507401826361,
2239
+ "loss": 0.3089,
2240
+ "step": 112000
2241
+ },
2242
+ {
2243
+ "epoch": 2.85,
2244
+ "eval_loss": 0.8437691330909729,
2245
+ "eval_runtime": 1.2859,
2246
+ "eval_samples_per_second": 777.686,
2247
+ "eval_steps_per_second": 12.443,
2248
+ "step": 112000
2249
+ },
2250
+ {
2251
+ "epoch": 2.87,
2252
+ "learning_rate": 0.0002763891393480666,
2253
+ "loss": 0.3086,
2254
+ "step": 112500
2255
+ },
2256
+ {
2257
+ "epoch": 2.88,
2258
+ "learning_rate": 0.0002761262110213175,
2259
+ "loss": 0.3088,
2260
+ "step": 113000
2261
+ },
2262
+ {
2263
+ "epoch": 2.88,
2264
+ "eval_loss": 0.8328044414520264,
2265
+ "eval_runtime": 1.2617,
2266
+ "eval_samples_per_second": 792.564,
2267
+ "eval_steps_per_second": 12.681,
2268
+ "step": 113000
2269
+ },
2270
+ {
2271
+ "epoch": 2.89,
2272
+ "learning_rate": 0.00027586195807773083,
2273
+ "loss": 0.308,
2274
+ "step": 113500
2275
+ },
2276
+ {
2277
+ "epoch": 2.91,
2278
+ "learning_rate": 0.00027559638340713435,
2279
+ "loss": 0.3085,
2280
+ "step": 114000
2281
+ },
2282
+ {
2283
+ "epoch": 2.91,
2284
+ "eval_loss": 0.831738293170929,
2285
+ "eval_runtime": 1.2398,
2286
+ "eval_samples_per_second": 806.55,
2287
+ "eval_steps_per_second": 12.905,
2288
+ "step": 114000
2289
+ },
2290
+ {
2291
+ "epoch": 2.92,
2292
+ "learning_rate": 0.00027532948991381025,
2293
+ "loss": 0.3079,
2294
+ "step": 114500
2295
+ },
2296
+ {
2297
+ "epoch": 2.93,
2298
+ "learning_rate": 0.00027506128051646287,
2299
+ "loss": 0.3097,
2300
+ "step": 115000
2301
+ },
2302
+ {
2303
+ "epoch": 2.93,
2304
+ "eval_loss": 0.8462249636650085,
2305
+ "eval_runtime": 1.2264,
2306
+ "eval_samples_per_second": 815.428,
2307
+ "eval_steps_per_second": 13.047,
2308
+ "step": 115000
2309
+ },
2310
+ {
2311
+ "epoch": 2.94,
2312
+ "learning_rate": 0.00027479175814818733,
2313
+ "loss": 0.3082,
2314
+ "step": 115500
2315
+ },
2316
+ {
2317
+ "epoch": 2.96,
2318
+ "learning_rate": 0.000274520925756437,
2319
+ "loss": 0.3082,
2320
+ "step": 116000
2321
+ },
2322
+ {
2323
+ "epoch": 2.96,
2324
+ "eval_loss": 0.8435738682746887,
2325
+ "eval_runtime": 1.3152,
2326
+ "eval_samples_per_second": 760.322,
2327
+ "eval_steps_per_second": 12.165,
2328
+ "step": 116000
2329
+ },
2330
+ {
2331
+ "epoch": 2.97,
2332
+ "learning_rate": 0.00027424878630299157,
2333
+ "loss": 0.3074,
2334
+ "step": 116500
2335
+ },
2336
+ {
2337
+ "epoch": 2.98,
2338
+ "learning_rate": 0.0002739753427639244,
2339
+ "loss": 0.3077,
2340
+ "step": 117000
2341
+ },
2342
+ {
2343
+ "epoch": 2.98,
2344
+ "eval_loss": 0.8435785174369812,
2345
+ "eval_runtime": 1.2302,
2346
+ "eval_samples_per_second": 812.844,
2347
+ "eval_steps_per_second": 13.006,
2348
+ "step": 117000
2349
+ },
2350
+ {
2351
+ "epoch": 2.99,
2352
+ "learning_rate": 0.0002737005981295704,
2353
+ "loss": 0.3078,
2354
+ "step": 117500
2355
+ },
2356
+ {
2357
+ "epoch": 3.01,
2358
+ "learning_rate": 0.0002734245554044927,
2359
+ "loss": 0.3086,
2360
+ "step": 118000
2361
+ },
2362
+ {
2363
+ "epoch": 3.01,
2364
+ "eval_loss": 0.8482502698898315,
2365
+ "eval_runtime": 1.3338,
2366
+ "eval_samples_per_second": 749.745,
2367
+ "eval_steps_per_second": 11.996,
2368
+ "step": 118000
2369
+ },
2370
+ {
2371
+ "epoch": 3.02,
2372
+ "learning_rate": 0.0002731472176074504,
2373
+ "loss": 0.307,
2374
+ "step": 118500
2375
+ },
2376
+ {
2377
+ "epoch": 3.03,
2378
+ "learning_rate": 0.0002728685877713653,
2379
+ "loss": 0.3072,
2380
+ "step": 119000
2381
+ },
2382
+ {
2383
+ "epoch": 3.03,
2384
+ "eval_loss": 0.8354719281196594,
2385
+ "eval_runtime": 1.4002,
2386
+ "eval_samples_per_second": 714.17,
2387
+ "eval_steps_per_second": 11.427,
2388
+ "step": 119000
2389
+ },
2390
+ {
2391
+ "epoch": 3.05,
2392
+ "learning_rate": 0.0002725886689432884,
2393
+ "loss": 0.3075,
2394
+ "step": 119500
2395
+ },
2396
+ {
2397
+ "epoch": 3.06,
2398
+ "learning_rate": 0.0002723074641843674,
2399
+ "loss": 0.3066,
2400
+ "step": 120000
2401
+ },
2402
+ {
2403
+ "epoch": 3.06,
2404
+ "eval_loss": 0.8281124234199524,
2405
+ "eval_runtime": 1.3099,
2406
+ "eval_samples_per_second": 763.433,
2407
+ "eval_steps_per_second": 12.215,
2408
+ "step": 120000
2409
  }
2410
  ],
2411
  "max_steps": 500000,
2412
  "num_train_epochs": 13,
2413
+ "total_flos": 3.833805228554865e+21,
2414
  "trial_name": null,
2415
  "trial_params": null
2416
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2d6b8966d1cc83e4388837c4e32ca9b57abfb21fb0d307bbaed74f29719988a7
3
  size 102501541
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:23090dda99868abea6a7796871fd763d22a4e00fff59d8a65d78e70f06487cae
3
  size 102501541