michaelsh commited on
Commit
7b20a26
1 Parent(s): 43c30f7

Upload 8 files

Browse files
Files changed (5) hide show
  1. optimizer.pt +1 -1
  2. pytorch_model.bin +1 -1
  3. rng_state.pth +1 -1
  4. scheduler.pt +1 -1
  5. trainer_state.json +144 -3
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fd5def78fcf35ce8197a0a89b2cddd0d0ab2a7e652f880a0f267e8082c15dcc1
3
  size 189555589
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c59972c95dfd9d8dc8d4fe2c4f4cb7ce5fb39537b0161254edca65df091d549b
3
  size 189555589
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:213bef9ea1cb8c00dc924977331618ea4ae9823f2d2b164e92ee138a1a71b505
3
  size 94783376
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d20449f5387f2e89e2efd4d20957f6763c7f3da6f64fba387947779cacb8f725
3
  size 94783376
rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:98fe4e41e502c92480c84ccee907b3ce52b631e614a8ed347276406cc29ef887
3
  size 14575
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9f01bdf15074da8661bd2dbcdc6ebeeb9fde104f9841dc471c800d924b1ff1e8
3
  size 14575
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:27fb51ce21283dcc0c99fed0d74cb955b323f5a3c1836121af7be1af641d3763
3
  size 627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a51857a10a112633f3a3bb87a57dcd7db82d34b311ca38f2a90efd0475a73f59
3
  size 627
trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": 0.87,
3
  "best_model_checkpoint": "distilhubert-finetuned-gtzan/checkpoint-1356",
4
- "epoch": 12.0,
5
- "global_step": 1356,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -1740,11 +1740,152 @@
1740
  "eval_samples_per_second": 2.675,
1741
  "eval_steps_per_second": 0.348,
1742
  "step": 1356
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1743
  }
1744
  ],
1745
  "max_steps": 2260,
1746
  "num_train_epochs": 20,
1747
- "total_flos": 3.6803929647744e+17,
1748
  "trial_name": null,
1749
  "trial_params": null
1750
  }
 
1
  {
2
  "best_metric": 0.87,
3
  "best_model_checkpoint": "distilhubert-finetuned-gtzan/checkpoint-1356",
4
+ "epoch": 13.0,
5
+ "global_step": 1469,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
1740
  "eval_samples_per_second": 2.675,
1741
  "eval_steps_per_second": 0.348,
1742
  "step": 1356
1743
+ },
1744
+ {
1745
+ "epoch": 12.04,
1746
+ "learning_rate": 2.227138643067847e-05,
1747
+ "loss": 0.0058,
1748
+ "step": 1360
1749
+ },
1750
+ {
1751
+ "epoch": 12.08,
1752
+ "learning_rate": 2.2148475909537858e-05,
1753
+ "loss": 0.0076,
1754
+ "step": 1365
1755
+ },
1756
+ {
1757
+ "epoch": 12.12,
1758
+ "learning_rate": 2.202556538839725e-05,
1759
+ "loss": 0.0085,
1760
+ "step": 1370
1761
+ },
1762
+ {
1763
+ "epoch": 12.17,
1764
+ "learning_rate": 2.190265486725664e-05,
1765
+ "loss": 0.0068,
1766
+ "step": 1375
1767
+ },
1768
+ {
1769
+ "epoch": 12.21,
1770
+ "learning_rate": 2.177974434611603e-05,
1771
+ "loss": 0.0058,
1772
+ "step": 1380
1773
+ },
1774
+ {
1775
+ "epoch": 12.26,
1776
+ "learning_rate": 2.165683382497542e-05,
1777
+ "loss": 0.0058,
1778
+ "step": 1385
1779
+ },
1780
+ {
1781
+ "epoch": 12.3,
1782
+ "learning_rate": 2.153392330383481e-05,
1783
+ "loss": 0.0059,
1784
+ "step": 1390
1785
+ },
1786
+ {
1787
+ "epoch": 12.35,
1788
+ "learning_rate": 2.14110127826942e-05,
1789
+ "loss": 0.006,
1790
+ "step": 1395
1791
+ },
1792
+ {
1793
+ "epoch": 12.39,
1794
+ "learning_rate": 2.1288102261553592e-05,
1795
+ "loss": 0.0053,
1796
+ "step": 1400
1797
+ },
1798
+ {
1799
+ "epoch": 12.43,
1800
+ "learning_rate": 2.116519174041298e-05,
1801
+ "loss": 0.0064,
1802
+ "step": 1405
1803
+ },
1804
+ {
1805
+ "epoch": 12.48,
1806
+ "learning_rate": 2.104228121927237e-05,
1807
+ "loss": 0.0054,
1808
+ "step": 1410
1809
+ },
1810
+ {
1811
+ "epoch": 12.52,
1812
+ "learning_rate": 2.091937069813176e-05,
1813
+ "loss": 0.0979,
1814
+ "step": 1415
1815
+ },
1816
+ {
1817
+ "epoch": 12.57,
1818
+ "learning_rate": 2.079646017699115e-05,
1819
+ "loss": 0.0052,
1820
+ "step": 1420
1821
+ },
1822
+ {
1823
+ "epoch": 12.61,
1824
+ "learning_rate": 2.0673549655850542e-05,
1825
+ "loss": 0.0049,
1826
+ "step": 1425
1827
+ },
1828
+ {
1829
+ "epoch": 12.65,
1830
+ "learning_rate": 2.055063913470993e-05,
1831
+ "loss": 0.0051,
1832
+ "step": 1430
1833
+ },
1834
+ {
1835
+ "epoch": 12.7,
1836
+ "learning_rate": 2.0427728613569323e-05,
1837
+ "loss": 0.0053,
1838
+ "step": 1435
1839
+ },
1840
+ {
1841
+ "epoch": 12.74,
1842
+ "learning_rate": 2.030481809242871e-05,
1843
+ "loss": 0.0095,
1844
+ "step": 1440
1845
+ },
1846
+ {
1847
+ "epoch": 12.79,
1848
+ "learning_rate": 2.0181907571288103e-05,
1849
+ "loss": 0.005,
1850
+ "step": 1445
1851
+ },
1852
+ {
1853
+ "epoch": 12.83,
1854
+ "learning_rate": 2.0058997050147492e-05,
1855
+ "loss": 0.0048,
1856
+ "step": 1450
1857
+ },
1858
+ {
1859
+ "epoch": 12.88,
1860
+ "learning_rate": 1.9936086529006884e-05,
1861
+ "loss": 0.0045,
1862
+ "step": 1455
1863
+ },
1864
+ {
1865
+ "epoch": 12.92,
1866
+ "learning_rate": 1.9813176007866273e-05,
1867
+ "loss": 0.0048,
1868
+ "step": 1460
1869
+ },
1870
+ {
1871
+ "epoch": 12.96,
1872
+ "learning_rate": 1.9690265486725665e-05,
1873
+ "loss": 0.005,
1874
+ "step": 1465
1875
+ },
1876
+ {
1877
+ "epoch": 13.0,
1878
+ "eval_accuracy": 0.87,
1879
+ "eval_loss": 0.7023229002952576,
1880
+ "eval_runtime": 37.9815,
1881
+ "eval_samples_per_second": 2.633,
1882
+ "eval_steps_per_second": 0.342,
1883
+ "step": 1469
1884
  }
1885
  ],
1886
  "max_steps": 2260,
1887
  "num_train_epochs": 20,
1888
+ "total_flos": 3.9870923785056e+17,
1889
  "trial_name": null,
1890
  "trial_params": null
1891
  }