Training in progress, step 80000
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/rng_state_4.pth +1 -1
- last-checkpoint/rng_state_5.pth +1 -1
- last-checkpoint/rng_state_6.pth +1 -1
- last-checkpoint/rng_state_7.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +203 -3
- pytorch_model.bin +1 -1
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 202194449
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b383fa90f7293897c37b40f067f202019bba2c3453db8d1f328f6d7383cad678
|
3 |
size 202194449
|
last-checkpoint/pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 102501541
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:485ed4a755afcfc6deaa5749cdb0f525c637fe083bc46abe1fbf43c5b6a8379e
|
3 |
size 102501541
|
last-checkpoint/rng_state_0.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6bf626f1c0627eaad3670cfc3c996a53c82f558d9ed75a3b6ecc98e209dc748f
|
3 |
size 14503
|
last-checkpoint/rng_state_1.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6bf626f1c0627eaad3670cfc3c996a53c82f558d9ed75a3b6ecc98e209dc748f
|
3 |
size 14503
|
last-checkpoint/rng_state_2.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6bf626f1c0627eaad3670cfc3c996a53c82f558d9ed75a3b6ecc98e209dc748f
|
3 |
size 14503
|
last-checkpoint/rng_state_3.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6bf626f1c0627eaad3670cfc3c996a53c82f558d9ed75a3b6ecc98e209dc748f
|
3 |
size 14503
|
last-checkpoint/rng_state_4.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6bf626f1c0627eaad3670cfc3c996a53c82f558d9ed75a3b6ecc98e209dc748f
|
3 |
size 14503
|
last-checkpoint/rng_state_5.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6bf626f1c0627eaad3670cfc3c996a53c82f558d9ed75a3b6ecc98e209dc748f
|
3 |
size 14503
|
last-checkpoint/rng_state_6.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6bf626f1c0627eaad3670cfc3c996a53c82f558d9ed75a3b6ecc98e209dc748f
|
3 |
size 14503
|
last-checkpoint/rng_state_7.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6bf626f1c0627eaad3670cfc3c996a53c82f558d9ed75a3b6ecc98e209dc748f
|
3 |
size 14503
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 623
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8ff31836ff2c96f7fb19d95df664b507273477e3a4f87dcce611b28b7e31820b
|
3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch":
|
5 |
-
"global_step":
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
@@ -1406,11 +1406,211 @@
|
|
1406 |
"eval_samples_per_second": 804.488,
|
1407 |
"eval_steps_per_second": 12.872,
|
1408 |
"step": 70000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1409 |
}
|
1410 |
],
|
1411 |
"max_steps": 500000,
|
1412 |
"num_train_epochs": 13,
|
1413 |
-
"total_flos": 2.
|
1414 |
"trial_name": null,
|
1415 |
"trial_params": null
|
1416 |
}
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 2.038839900096845,
|
5 |
+
"global_step": 80000,
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
|
|
1406 |
"eval_samples_per_second": 804.488,
|
1407 |
"eval_steps_per_second": 12.872,
|
1408 |
"step": 70000
|
1409 |
+
},
|
1410 |
+
{
|
1411 |
+
"epoch": 1.8,
|
1412 |
+
"learning_rate": 0.0002934838153624519,
|
1413 |
+
"loss": 0.3243,
|
1414 |
+
"step": 70500
|
1415 |
+
},
|
1416 |
+
{
|
1417 |
+
"epoch": 1.81,
|
1418 |
+
"learning_rate": 0.00029334092796560427,
|
1419 |
+
"loss": 0.3238,
|
1420 |
+
"step": 71000
|
1421 |
+
},
|
1422 |
+
{
|
1423 |
+
"epoch": 1.81,
|
1424 |
+
"eval_loss": 0.8432244658470154,
|
1425 |
+
"eval_runtime": 1.2842,
|
1426 |
+
"eval_samples_per_second": 778.669,
|
1427 |
+
"eval_steps_per_second": 12.459,
|
1428 |
+
"step": 71000
|
1429 |
+
},
|
1430 |
+
{
|
1431 |
+
"epoch": 1.82,
|
1432 |
+
"learning_rate": 0.0002931965276945326,
|
1433 |
+
"loss": 0.3235,
|
1434 |
+
"step": 71500
|
1435 |
+
},
|
1436 |
+
{
|
1437 |
+
"epoch": 1.83,
|
1438 |
+
"learning_rate": 0.0002930506161283751,
|
1439 |
+
"loss": 0.3235,
|
1440 |
+
"step": 72000
|
1441 |
+
},
|
1442 |
+
{
|
1443 |
+
"epoch": 1.83,
|
1444 |
+
"eval_loss": 0.8572074174880981,
|
1445 |
+
"eval_runtime": 1.2536,
|
1446 |
+
"eval_samples_per_second": 797.713,
|
1447 |
+
"eval_steps_per_second": 12.763,
|
1448 |
+
"step": 72000
|
1449 |
+
},
|
1450 |
+
{
|
1451 |
+
"epoch": 1.85,
|
1452 |
+
"learning_rate": 0.00029290319486279724,
|
1453 |
+
"loss": 0.3229,
|
1454 |
+
"step": 72500
|
1455 |
+
},
|
1456 |
+
{
|
1457 |
+
"epoch": 1.86,
|
1458 |
+
"learning_rate": 0.0002927542655099744,
|
1459 |
+
"loss": 0.3227,
|
1460 |
+
"step": 73000
|
1461 |
+
},
|
1462 |
+
{
|
1463 |
+
"epoch": 1.86,
|
1464 |
+
"eval_loss": 0.8465535044670105,
|
1465 |
+
"eval_runtime": 1.184,
|
1466 |
+
"eval_samples_per_second": 844.583,
|
1467 |
+
"eval_steps_per_second": 13.513,
|
1468 |
+
"step": 73000
|
1469 |
+
},
|
1470 |
+
{
|
1471 |
+
"epoch": 1.87,
|
1472 |
+
"learning_rate": 0.00029260382969857417,
|
1473 |
+
"loss": 0.3223,
|
1474 |
+
"step": 73500
|
1475 |
+
},
|
1476 |
+
{
|
1477 |
+
"epoch": 1.89,
|
1478 |
+
"learning_rate": 0.00029245188907373845,
|
1479 |
+
"loss": 0.3224,
|
1480 |
+
"step": 74000
|
1481 |
+
},
|
1482 |
+
{
|
1483 |
+
"epoch": 1.89,
|
1484 |
+
"eval_loss": 0.8523721694946289,
|
1485 |
+
"eval_runtime": 1.247,
|
1486 |
+
"eval_samples_per_second": 801.902,
|
1487 |
+
"eval_steps_per_second": 12.83,
|
1488 |
+
"step": 74000
|
1489 |
+
},
|
1490 |
+
{
|
1491 |
+
"epoch": 1.9,
|
1492 |
+
"learning_rate": 0.0002922984452970655,
|
1493 |
+
"loss": 0.3219,
|
1494 |
+
"step": 74500
|
1495 |
+
},
|
1496 |
+
{
|
1497 |
+
"epoch": 1.91,
|
1498 |
+
"learning_rate": 0.000292143500046592,
|
1499 |
+
"loss": 0.3217,
|
1500 |
+
"step": 75000
|
1501 |
+
},
|
1502 |
+
{
|
1503 |
+
"epoch": 1.91,
|
1504 |
+
"eval_loss": 0.8451367616653442,
|
1505 |
+
"eval_runtime": 1.266,
|
1506 |
+
"eval_samples_per_second": 789.893,
|
1507 |
+
"eval_steps_per_second": 12.638,
|
1508 |
+
"step": 75000
|
1509 |
+
},
|
1510 |
+
{
|
1511 |
+
"epoch": 1.92,
|
1512 |
+
"learning_rate": 0.0002919870550167743,
|
1513 |
+
"loss": 0.3216,
|
1514 |
+
"step": 75500
|
1515 |
+
},
|
1516 |
+
{
|
1517 |
+
"epoch": 1.94,
|
1518 |
+
"learning_rate": 0.0002918291119184702,
|
1519 |
+
"loss": 0.321,
|
1520 |
+
"step": 76000
|
1521 |
+
},
|
1522 |
+
{
|
1523 |
+
"epoch": 1.94,
|
1524 |
+
"eval_loss": 0.8452543020248413,
|
1525 |
+
"eval_runtime": 1.2406,
|
1526 |
+
"eval_samples_per_second": 806.062,
|
1527 |
+
"eval_steps_per_second": 12.897,
|
1528 |
+
"step": 76000
|
1529 |
+
},
|
1530 |
+
{
|
1531 |
+
"epoch": 1.95,
|
1532 |
+
"learning_rate": 0.0002916696724789201,
|
1533 |
+
"loss": 0.321,
|
1534 |
+
"step": 76500
|
1535 |
+
},
|
1536 |
+
{
|
1537 |
+
"epoch": 1.96,
|
1538 |
+
"learning_rate": 0.00029150873844172823,
|
1539 |
+
"loss": 0.3207,
|
1540 |
+
"step": 77000
|
1541 |
+
},
|
1542 |
+
{
|
1543 |
+
"epoch": 1.96,
|
1544 |
+
"eval_loss": 0.8388876914978027,
|
1545 |
+
"eval_runtime": 1.2429,
|
1546 |
+
"eval_samples_per_second": 804.592,
|
1547 |
+
"eval_steps_per_second": 12.873,
|
1548 |
+
"step": 77000
|
1549 |
+
},
|
1550 |
+
{
|
1551 |
+
"epoch": 1.98,
|
1552 |
+
"learning_rate": 0.00029134631156684334,
|
1553 |
+
"loss": 0.3204,
|
1554 |
+
"step": 77500
|
1555 |
+
},
|
1556 |
+
{
|
1557 |
+
"epoch": 1.99,
|
1558 |
+
"learning_rate": 0.0002911823936305398,
|
1559 |
+
"loss": 0.3202,
|
1560 |
+
"step": 78000
|
1561 |
+
},
|
1562 |
+
{
|
1563 |
+
"epoch": 1.99,
|
1564 |
+
"eval_loss": 0.8390601873397827,
|
1565 |
+
"eval_runtime": 1.2051,
|
1566 |
+
"eval_samples_per_second": 829.802,
|
1567 |
+
"eval_steps_per_second": 13.277,
|
1568 |
+
"step": 78000
|
1569 |
+
},
|
1570 |
+
{
|
1571 |
+
"epoch": 2.0,
|
1572 |
+
"learning_rate": 0.0002910169864253979,
|
1573 |
+
"loss": 0.3198,
|
1574 |
+
"step": 78500
|
1575 |
+
},
|
1576 |
+
{
|
1577 |
+
"epoch": 2.01,
|
1578 |
+
"learning_rate": 0.0002908500917602842,
|
1579 |
+
"loss": 0.3195,
|
1580 |
+
"step": 79000
|
1581 |
+
},
|
1582 |
+
{
|
1583 |
+
"epoch": 2.01,
|
1584 |
+
"eval_loss": 0.853469729423523,
|
1585 |
+
"eval_runtime": 1.2478,
|
1586 |
+
"eval_samples_per_second": 801.439,
|
1587 |
+
"eval_steps_per_second": 12.823,
|
1588 |
+
"step": 79000
|
1589 |
+
},
|
1590 |
+
{
|
1591 |
+
"epoch": 2.03,
|
1592 |
+
"learning_rate": 0.00029068171146033226,
|
1593 |
+
"loss": 0.3196,
|
1594 |
+
"step": 79500
|
1595 |
+
},
|
1596 |
+
{
|
1597 |
+
"epoch": 2.04,
|
1598 |
+
"learning_rate": 0.0002905118473669218,
|
1599 |
+
"loss": 0.3194,
|
1600 |
+
"step": 80000
|
1601 |
+
},
|
1602 |
+
{
|
1603 |
+
"epoch": 2.04,
|
1604 |
+
"eval_loss": 0.8577731847763062,
|
1605 |
+
"eval_runtime": 1.1943,
|
1606 |
+
"eval_samples_per_second": 837.345,
|
1607 |
+
"eval_steps_per_second": 13.398,
|
1608 |
+
"step": 80000
|
1609 |
}
|
1610 |
],
|
1611 |
"max_steps": 500000,
|
1612 |
"num_train_epochs": 13,
|
1613 |
+
"total_flos": 2.55587015236991e+21,
|
1614 |
"trial_name": null,
|
1615 |
"trial_params": null
|
1616 |
}
|
pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 102501541
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:485ed4a755afcfc6deaa5749cdb0f525c637fe083bc46abe1fbf43c5b6a8379e
|
3 |
size 102501541
|