MarkelFe commited on
Commit
b16e795
1 Parent(s): 86b6e8d

Training in progress, step 140000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:07da6cc3c2d68014088f1ee4c711e070435309050076d3a9ba72efaf28904ab8
3
  size 995605445
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cc3812042d03721dd2e98c646d7fbd591edd7e67355f2c23bb6e310d353ffb03
3
  size 995605445
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2f4b57e2e58483c18b5fe4ec5dd79a83877c71aff1fbb8a57d6941745f468c5e
3
  size 510398013
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ff3b25c771db1e8a7cb96a6b9fafacb9b63e8e354d25fc6a16953bbd07d49a3d
3
  size 510398013
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d1f521d7e7b3b42bd4187999b7a8ee4e48ff2037dbac9d1aa8bcc52060f7db7c
3
  size 14575
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bde063ad0d958c0faa5d30fe45bd49a698856b439891ac3f26a2ccce7067870e
3
  size 14575
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bf3296949d51d37aba3f66b6faa847888a811e29459c0dd69332f32f91246e0e
3
  size 627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:16b6acd039c0b1a39efd4c1d0dea49b043256ea2b380aaca2afa9c769cbd843b
3
  size 627
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 3.0779429870252866,
5
- "global_step": 130000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -1676,11 +1676,139 @@
1676
  "eval_samples_per_second": 165.289,
1677
  "eval_steps_per_second": 20.666,
1678
  "step": 130000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1679
  }
1680
  ],
1681
  "max_steps": 633540,
1682
  "num_train_epochs": 15,
1683
- "total_flos": 2.7542948235264e+16,
1684
  "trial_name": null,
1685
  "trial_params": null
1686
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 3.3147078321810777,
5
+ "global_step": 140000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
1676
  "eval_samples_per_second": 165.289,
1677
  "eval_steps_per_second": 20.666,
1678
  "step": 130000
1679
+ },
1680
+ {
1681
+ "epoch": 3.09,
1682
+ "learning_rate": 2.265530195409919e-05,
1683
+ "loss": 2.7526,
1684
+ "step": 130500
1685
+ },
1686
+ {
1687
+ "epoch": 3.1,
1688
+ "learning_rate": 2.2592164662057647e-05,
1689
+ "loss": 2.7588,
1690
+ "step": 131000
1691
+ },
1692
+ {
1693
+ "epoch": 3.11,
1694
+ "learning_rate": 2.2529027370016102e-05,
1695
+ "loss": 2.7342,
1696
+ "step": 131500
1697
+ },
1698
+ {
1699
+ "epoch": 3.13,
1700
+ "learning_rate": 2.246589007797456e-05,
1701
+ "loss": 2.7385,
1702
+ "step": 132000
1703
+ },
1704
+ {
1705
+ "epoch": 3.14,
1706
+ "learning_rate": 2.2402752785933015e-05,
1707
+ "loss": 2.7368,
1708
+ "step": 132500
1709
+ },
1710
+ {
1711
+ "epoch": 3.15,
1712
+ "learning_rate": 2.2339615493891467e-05,
1713
+ "loss": 2.7504,
1714
+ "step": 133000
1715
+ },
1716
+ {
1717
+ "epoch": 3.16,
1718
+ "learning_rate": 2.2276478201849922e-05,
1719
+ "loss": 2.7547,
1720
+ "step": 133500
1721
+ },
1722
+ {
1723
+ "epoch": 3.17,
1724
+ "learning_rate": 2.2213340909808377e-05,
1725
+ "loss": 2.7647,
1726
+ "step": 134000
1727
+ },
1728
+ {
1729
+ "epoch": 3.18,
1730
+ "learning_rate": 2.2150203617766836e-05,
1731
+ "loss": 2.7666,
1732
+ "step": 134500
1733
+ },
1734
+ {
1735
+ "epoch": 3.2,
1736
+ "learning_rate": 2.208706632572529e-05,
1737
+ "loss": 2.7559,
1738
+ "step": 135000
1739
+ },
1740
+ {
1741
+ "epoch": 3.21,
1742
+ "learning_rate": 2.2023929033683746e-05,
1743
+ "loss": 2.7673,
1744
+ "step": 135500
1745
+ },
1746
+ {
1747
+ "epoch": 3.22,
1748
+ "learning_rate": 2.19607917416422e-05,
1749
+ "loss": 2.76,
1750
+ "step": 136000
1751
+ },
1752
+ {
1753
+ "epoch": 3.23,
1754
+ "learning_rate": 2.189765444960066e-05,
1755
+ "loss": 2.7574,
1756
+ "step": 136500
1757
+ },
1758
+ {
1759
+ "epoch": 3.24,
1760
+ "learning_rate": 2.1834517157559115e-05,
1761
+ "loss": 2.7698,
1762
+ "step": 137000
1763
+ },
1764
+ {
1765
+ "epoch": 3.26,
1766
+ "learning_rate": 2.177137986551757e-05,
1767
+ "loss": 2.7619,
1768
+ "step": 137500
1769
+ },
1770
+ {
1771
+ "epoch": 3.27,
1772
+ "learning_rate": 2.1708242573476025e-05,
1773
+ "loss": 2.7511,
1774
+ "step": 138000
1775
+ },
1776
+ {
1777
+ "epoch": 3.28,
1778
+ "learning_rate": 2.1645105281434484e-05,
1779
+ "loss": 2.7578,
1780
+ "step": 138500
1781
+ },
1782
+ {
1783
+ "epoch": 3.29,
1784
+ "learning_rate": 2.158196798939294e-05,
1785
+ "loss": 2.7699,
1786
+ "step": 139000
1787
+ },
1788
+ {
1789
+ "epoch": 3.3,
1790
+ "learning_rate": 2.1518830697351394e-05,
1791
+ "loss": 2.7586,
1792
+ "step": 139500
1793
+ },
1794
+ {
1795
+ "epoch": 3.31,
1796
+ "learning_rate": 2.145569340530985e-05,
1797
+ "loss": 2.761,
1798
+ "step": 140000
1799
+ },
1800
+ {
1801
+ "epoch": 3.31,
1802
+ "eval_loss": 3.1274259090423584,
1803
+ "eval_runtime": 113.556,
1804
+ "eval_samples_per_second": 165.31,
1805
+ "eval_steps_per_second": 20.668,
1806
+ "step": 140000
1807
  }
1808
  ],
1809
  "max_steps": 633540,
1810
  "num_train_epochs": 15,
1811
+ "total_flos": 2.9651152375296e+16,
1812
  "trial_name": null,
1813
  "trial_params": null
1814
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2f4b57e2e58483c18b5fe4ec5dd79a83877c71aff1fbb8a57d6941745f468c5e
3
  size 510398013
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ff3b25c771db1e8a7cb96a6b9fafacb9b63e8e354d25fc6a16953bbd07d49a3d
3
  size 510398013