jflotz commited on
Commit
772cf88
1 Parent(s): fc7d79e

Training in progress, step 70000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:11cf0c7d6b061f7dad52edce9e5088c7de01b0d50fde085c881cac1ce38869f0
3
- size 50044241
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c390c19518ce2505f6a86ec1e2f47d0d41f2396b4d8d3b3070b634f49cd1065d
3
+ size 50044689
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:32de19aae4938ccc6ceff2fe466b7bb7987521081cfbb1f7e1384e10682c2045
3
  size 25761253
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9fec60f3309de39871877c86c47238ea77b026c2a586d001d07ccd4e052fc5ce
3
  size 25761253
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fe81dd181a7243c753033870695d21544789f2b7d0e68df1f7bb5c7aa16b0ba0
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:456f6e75773c2103858da7be2712eb3dc4e020101d1bbc59dc5323b907e2cfa7
3
  size 14503
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fe81dd181a7243c753033870695d21544789f2b7d0e68df1f7bb5c7aa16b0ba0
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:456f6e75773c2103858da7be2712eb3dc4e020101d1bbc59dc5323b907e2cfa7
3
  size 14503
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fe81dd181a7243c753033870695d21544789f2b7d0e68df1f7bb5c7aa16b0ba0
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:456f6e75773c2103858da7be2712eb3dc4e020101d1bbc59dc5323b907e2cfa7
3
  size 14503
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fe81dd181a7243c753033870695d21544789f2b7d0e68df1f7bb5c7aa16b0ba0
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:456f6e75773c2103858da7be2712eb3dc4e020101d1bbc59dc5323b907e2cfa7
3
  size 14503
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fe81dd181a7243c753033870695d21544789f2b7d0e68df1f7bb5c7aa16b0ba0
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:456f6e75773c2103858da7be2712eb3dc4e020101d1bbc59dc5323b907e2cfa7
3
  size 14503
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fe81dd181a7243c753033870695d21544789f2b7d0e68df1f7bb5c7aa16b0ba0
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:456f6e75773c2103858da7be2712eb3dc4e020101d1bbc59dc5323b907e2cfa7
3
  size 14503
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fe81dd181a7243c753033870695d21544789f2b7d0e68df1f7bb5c7aa16b0ba0
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:456f6e75773c2103858da7be2712eb3dc4e020101d1bbc59dc5323b907e2cfa7
3
  size 14503
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fe81dd181a7243c753033870695d21544789f2b7d0e68df1f7bb5c7aa16b0ba0
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:456f6e75773c2103858da7be2712eb3dc4e020101d1bbc59dc5323b907e2cfa7
3
  size 14503
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6a14cae2e3d67695f0ffa4604dbaaa3f7147d70607921bb89c9afa09d57c87a5
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d89de13f8c844c8306762a31aff6dac5ffaadd95c6501762d83dc7939ed9eace
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 2.6766595289079227,
5
- "global_step": 60000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -1206,11 +1206,211 @@
1206
  "eval_samples_per_second": 1024.23,
1207
  "eval_steps_per_second": 16.052,
1208
  "step": 60000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1209
  }
1210
  ],
1211
  "max_steps": 250000,
1212
  "num_train_epochs": 12,
1213
- "total_flos": 9.609937532304774e+20,
1214
  "trial_name": null,
1215
  "trial_params": null
1216
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 3.122769450392577,
5
+ "global_step": 70000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
1206
  "eval_samples_per_second": 1024.23,
1207
  "eval_steps_per_second": 16.052,
1208
  "step": 60000
1209
+ },
1210
+ {
1211
+ "epoch": 2.7,
1212
+ "learning_rate": 0.0005425079773152721,
1213
+ "loss": 0.5102,
1214
+ "step": 60500
1215
+ },
1216
+ {
1217
+ "epoch": 2.72,
1218
+ "learning_rate": 0.0005413455519215879,
1219
+ "loss": 0.5061,
1220
+ "step": 61000
1221
+ },
1222
+ {
1223
+ "epoch": 2.72,
1224
+ "eval_loss": 0.4848201274871826,
1225
+ "eval_runtime": 2.1727,
1226
+ "eval_samples_per_second": 1057.234,
1227
+ "eval_steps_per_second": 16.57,
1228
+ "step": 61000
1229
+ },
1230
+ {
1231
+ "epoch": 2.74,
1232
+ "learning_rate": 0.0005401727880078093,
1233
+ "loss": 0.502,
1234
+ "step": 61500
1235
+ },
1236
+ {
1237
+ "epoch": 2.77,
1238
+ "learning_rate": 0.0005389897368744289,
1239
+ "loss": 0.4981,
1240
+ "step": 62000
1241
+ },
1242
+ {
1243
+ "epoch": 2.77,
1244
+ "eval_loss": 0.4745917320251465,
1245
+ "eval_runtime": 2.1553,
1246
+ "eval_samples_per_second": 1065.724,
1247
+ "eval_steps_per_second": 16.703,
1248
+ "step": 62000
1249
+ },
1250
+ {
1251
+ "epoch": 2.79,
1252
+ "learning_rate": 0.0005377964502719361,
1253
+ "loss": 0.4972,
1254
+ "step": 62500
1255
+ },
1256
+ {
1257
+ "epoch": 2.81,
1258
+ "learning_rate": 0.0005365929803985524,
1259
+ "loss": 0.4912,
1260
+ "step": 63000
1261
+ },
1262
+ {
1263
+ "epoch": 2.81,
1264
+ "eval_loss": 0.46808210015296936,
1265
+ "eval_runtime": 2.2693,
1266
+ "eval_samples_per_second": 1012.199,
1267
+ "eval_steps_per_second": 15.864,
1268
+ "step": 63000
1269
+ },
1270
+ {
1271
+ "epoch": 2.83,
1272
+ "learning_rate": 0.0005353793798979489,
1273
+ "loss": 0.4882,
1274
+ "step": 63500
1275
+ },
1276
+ {
1277
+ "epoch": 2.86,
1278
+ "learning_rate": 0.000534155701856943,
1279
+ "loss": 0.4847,
1280
+ "step": 64000
1281
+ },
1282
+ {
1283
+ "epoch": 2.86,
1284
+ "eval_loss": 0.45994389057159424,
1285
+ "eval_runtime": 2.2357,
1286
+ "eval_samples_per_second": 1027.41,
1287
+ "eval_steps_per_second": 16.102,
1288
+ "step": 64000
1289
+ },
1290
+ {
1291
+ "epoch": 2.88,
1292
+ "learning_rate": 0.0005329219998031763,
1293
+ "loss": 0.4818,
1294
+ "step": 64500
1295
+ },
1296
+ {
1297
+ "epoch": 2.9,
1298
+ "learning_rate": 0.0005316783277027734,
1299
+ "loss": 0.4792,
1300
+ "step": 65000
1301
+ },
1302
+ {
1303
+ "epoch": 2.9,
1304
+ "eval_loss": 0.4537006914615631,
1305
+ "eval_runtime": 2.1952,
1306
+ "eval_samples_per_second": 1046.372,
1307
+ "eval_steps_per_second": 16.399,
1308
+ "step": 65000
1309
+ },
1310
+ {
1311
+ "epoch": 2.92,
1312
+ "learning_rate": 0.0005304247399579808,
1313
+ "loss": 0.4766,
1314
+ "step": 65500
1315
+ },
1316
+ {
1317
+ "epoch": 2.94,
1318
+ "learning_rate": 0.0005291612914047876,
1319
+ "loss": 0.474,
1320
+ "step": 66000
1321
+ },
1322
+ {
1323
+ "epoch": 2.94,
1324
+ "eval_loss": 0.44910311698913574,
1325
+ "eval_runtime": 2.304,
1326
+ "eval_samples_per_second": 996.96,
1327
+ "eval_steps_per_second": 15.625,
1328
+ "step": 66000
1329
+ },
1330
+ {
1331
+ "epoch": 2.97,
1332
+ "learning_rate": 0.0005278880373105263,
1333
+ "loss": 0.4713,
1334
+ "step": 66500
1335
+ },
1336
+ {
1337
+ "epoch": 2.99,
1338
+ "learning_rate": 0.0005266050333714561,
1339
+ "loss": 0.4688,
1340
+ "step": 67000
1341
+ },
1342
+ {
1343
+ "epoch": 2.99,
1344
+ "eval_loss": 0.4437292516231537,
1345
+ "eval_runtime": 2.2404,
1346
+ "eval_samples_per_second": 1025.255,
1347
+ "eval_steps_per_second": 16.068,
1348
+ "step": 67000
1349
+ },
1350
+ {
1351
+ "epoch": 3.01,
1352
+ "learning_rate": 0.0005253123357103253,
1353
+ "loss": 0.4665,
1354
+ "step": 67500
1355
+ },
1356
+ {
1357
+ "epoch": 3.03,
1358
+ "learning_rate": 0.0005240100008739177,
1359
+ "loss": 0.464,
1360
+ "step": 68000
1361
+ },
1362
+ {
1363
+ "epoch": 3.03,
1364
+ "eval_loss": 0.4391787052154541,
1365
+ "eval_runtime": 2.1873,
1366
+ "eval_samples_per_second": 1050.17,
1367
+ "eval_steps_per_second": 16.459,
1368
+ "step": 68000
1369
+ },
1370
+ {
1371
+ "epoch": 3.06,
1372
+ "learning_rate": 0.0005226980858305778,
1373
+ "loss": 0.4616,
1374
+ "step": 68500
1375
+ },
1376
+ {
1377
+ "epoch": 3.08,
1378
+ "learning_rate": 0.0005213766479677197,
1379
+ "loss": 0.4592,
1380
+ "step": 69000
1381
+ },
1382
+ {
1383
+ "epoch": 3.08,
1384
+ "eval_loss": 0.43239352107048035,
1385
+ "eval_runtime": 2.1819,
1386
+ "eval_samples_per_second": 1052.771,
1387
+ "eval_steps_per_second": 16.5,
1388
+ "step": 69000
1389
+ },
1390
+ {
1391
+ "epoch": 3.1,
1392
+ "learning_rate": 0.0005200457450893163,
1393
+ "loss": 0.457,
1394
+ "step": 69500
1395
+ },
1396
+ {
1397
+ "epoch": 3.12,
1398
+ "learning_rate": 0.0005187054354133712,
1399
+ "loss": 0.4547,
1400
+ "step": 70000
1401
+ },
1402
+ {
1403
+ "epoch": 3.12,
1404
+ "eval_loss": 0.4284396469593048,
1405
+ "eval_runtime": 2.1519,
1406
+ "eval_samples_per_second": 1067.405,
1407
+ "eval_steps_per_second": 16.729,
1408
+ "step": 70000
1409
  }
1410
  ],
1411
  "max_steps": 250000,
1412
  "num_train_epochs": 12,
1413
+ "total_flos": 1.1211546241141079e+21,
1414
  "trial_name": null,
1415
  "trial_params": null
1416
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:32de19aae4938ccc6ceff2fe466b7bb7987521081cfbb1f7e1384e10682c2045
3
  size 25761253
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9fec60f3309de39871877c86c47238ea77b026c2a586d001d07ccd4e052fc5ce
3
  size 25761253