akahana commited on
Commit
faf8bfe
1 Parent(s): 02a9f46

End of training

Browse files
README.md CHANGED
@@ -1,9 +1,24 @@
1
  ---
2
  tags:
3
  - generated_from_trainer
 
 
 
 
4
  model-index:
5
  - name: roberta-javanese
6
- results: []
 
 
 
 
 
 
 
 
 
 
 
7
  ---
8
 
9
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
@@ -11,7 +26,10 @@ should probably proofread and complete it, then remove this comment. -->
11
 
12
  # roberta-javanese
13
 
14
- This model is a fine-tuned version of [](https://huggingface.co/) on an unknown dataset.
 
 
 
15
 
16
  ## Model description
17
 
 
1
  ---
2
  tags:
3
  - generated_from_trainer
4
+ datasets:
5
+ - akahana/GlotCC-V1-jav-Latn
6
+ metrics:
7
+ - accuracy
8
  model-index:
9
  - name: roberta-javanese
10
+ results:
11
+ - task:
12
+ name: Masked Language Modeling
13
+ type: fill-mask
14
+ dataset:
15
+ name: akahana/GlotCC-V1-jav-Latn default
16
+ type: akahana/GlotCC-V1-jav-Latn
17
+ args: default
18
+ metrics:
19
+ - name: Accuracy
20
+ type: accuracy
21
+ value: 0.5187187058672487
22
  ---
23
 
24
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 
26
 
27
  # roberta-javanese
28
 
29
+ This model is a fine-tuned version of [](https://huggingface.co/) on the akahana/GlotCC-V1-jav-Latn default dataset.
30
+ It achieves the following results on the evaluation set:
31
+ - Loss: 2.9966
32
+ - Accuracy: 0.5187
33
 
34
  ## Model description
35
 
all_results.json CHANGED
@@ -1,16 +1,16 @@
1
  {
2
- "epoch": 25.0,
3
- "eval_accuracy": 0.5023109513416356,
4
- "eval_loss": 3.115068197250366,
5
- "eval_runtime": 29.8729,
6
  "eval_samples": 4053,
7
- "eval_samples_per_second": 135.675,
8
- "eval_steps_per_second": 33.944,
9
- "perplexity": 22.53496712388723,
10
- "total_flos": 1.319930762508864e+17,
11
- "train_loss": 0.6150265672133651,
12
- "train_runtime": 7682.6614,
13
  "train_samples": 80219,
14
- "train_samples_per_second": 261.039,
15
- "train_steps_per_second": 16.316
16
  }
 
1
  {
2
+ "epoch": 30.0,
3
+ "eval_accuracy": 0.5187187058672487,
4
+ "eval_loss": 2.996563196182251,
5
+ "eval_runtime": 30.3359,
6
  "eval_samples": 4053,
7
+ "eval_samples_per_second": 133.604,
8
+ "eval_steps_per_second": 33.426,
9
+ "perplexity": 20.01662535880206,
10
+ "total_flos": 1.5839169150106368e+17,
11
+ "train_loss": 0.47119966579742084,
12
+ "train_runtime": 6930.0607,
13
  "train_samples": 80219,
14
+ "train_samples_per_second": 347.265,
15
+ "train_steps_per_second": 21.705
16
  }
eval_results.json CHANGED
@@ -1,10 +1,10 @@
1
  {
2
- "epoch": 25.0,
3
- "eval_accuracy": 0.5023109513416356,
4
- "eval_loss": 3.115068197250366,
5
- "eval_runtime": 29.8729,
6
  "eval_samples": 4053,
7
- "eval_samples_per_second": 135.675,
8
- "eval_steps_per_second": 33.944,
9
- "perplexity": 22.53496712388723
10
  }
 
1
  {
2
+ "epoch": 30.0,
3
+ "eval_accuracy": 0.5187187058672487,
4
+ "eval_loss": 2.996563196182251,
5
+ "eval_runtime": 30.3359,
6
  "eval_samples": 4053,
7
+ "eval_samples_per_second": 133.604,
8
+ "eval_steps_per_second": 33.426,
9
+ "perplexity": 20.01662535880206
10
  }
runs/Jul16_23-39-50_1f9b0cd15cbb/events.out.tfevents.1721180281.1f9b0cd15cbb.1376.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:690947851e0d8d29fd488745b8000d62a6dd22a837afe3681da84f72ae3418c6
3
+ size 417
train_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "epoch": 25.0,
3
- "total_flos": 1.319930762508864e+17,
4
- "train_loss": 0.6150265672133651,
5
- "train_runtime": 7682.6614,
6
  "train_samples": 80219,
7
- "train_samples_per_second": 261.039,
8
- "train_steps_per_second": 16.316
9
  }
 
1
  {
2
+ "epoch": 30.0,
3
+ "total_flos": 1.5839169150106368e+17,
4
+ "train_loss": 0.47119966579742084,
5
+ "train_runtime": 6930.0607,
6
  "train_samples": 80219,
7
+ "train_samples_per_second": 347.265,
8
+ "train_steps_per_second": 21.705
9
  }
trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 25.0,
5
  "eval_steps": 500,
6
- "global_step": 125350,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1802,12 +1802,371 @@
1802
  "train_runtime": 7682.6614,
1803
  "train_samples_per_second": 261.039,
1804
  "train_steps_per_second": 16.316
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1805
  }
1806
  ],
1807
  "logging_steps": 500,
1808
- "max_steps": 125350,
1809
  "num_input_tokens_seen": 0,
1810
- "num_train_epochs": 25,
1811
  "save_steps": 500,
1812
  "stateful_callbacks": {
1813
  "TrainerControl": {
@@ -1821,7 +2180,7 @@
1821
  "attributes": {}
1822
  }
1823
  },
1824
- "total_flos": 1.319930762508864e+17,
1825
  "train_batch_size": 16,
1826
  "trial_name": null,
1827
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 30.0,
5
  "eval_steps": 500,
6
+ "global_step": 150420,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1802
  "train_runtime": 7682.6614,
1803
  "train_samples_per_second": 261.039,
1804
  "train_steps_per_second": 16.316
1805
+ },
1806
+ {
1807
+ "epoch": 25.02991623454328,
1808
+ "grad_norm": 6.8266754150390625,
1809
+ "learning_rate": 4.9950139609094536e-05,
1810
+ "loss": 2.9041,
1811
+ "step": 125500
1812
+ },
1813
+ {
1814
+ "epoch": 25.12963701635421,
1815
+ "grad_norm": 7.047895431518555,
1816
+ "learning_rate": 4.9783938306076325e-05,
1817
+ "loss": 2.9501,
1818
+ "step": 126000
1819
+ },
1820
+ {
1821
+ "epoch": 25.229357798165136,
1822
+ "grad_norm": 6.489243507385254,
1823
+ "learning_rate": 4.9617737003058106e-05,
1824
+ "loss": 2.9795,
1825
+ "step": 126500
1826
+ },
1827
+ {
1828
+ "epoch": 25.329078579976066,
1829
+ "grad_norm": 6.933114528656006,
1830
+ "learning_rate": 4.9451535700039895e-05,
1831
+ "loss": 2.9906,
1832
+ "step": 127000
1833
+ },
1834
+ {
1835
+ "epoch": 25.428799361786997,
1836
+ "grad_norm": 7.721564769744873,
1837
+ "learning_rate": 4.9285334397021676e-05,
1838
+ "loss": 2.9822,
1839
+ "step": 127500
1840
+ },
1841
+ {
1842
+ "epoch": 25.528520143597927,
1843
+ "grad_norm": 7.604334831237793,
1844
+ "learning_rate": 4.911913309400346e-05,
1845
+ "loss": 2.9751,
1846
+ "step": 128000
1847
+ },
1848
+ {
1849
+ "epoch": 25.628240925408853,
1850
+ "grad_norm": 6.689730644226074,
1851
+ "learning_rate": 4.8952931790985246e-05,
1852
+ "loss": 2.9806,
1853
+ "step": 128500
1854
+ },
1855
+ {
1856
+ "epoch": 25.727961707219784,
1857
+ "grad_norm": 7.001711368560791,
1858
+ "learning_rate": 4.878673048796703e-05,
1859
+ "loss": 2.9701,
1860
+ "step": 129000
1861
+ },
1862
+ {
1863
+ "epoch": 25.827682489030714,
1864
+ "grad_norm": 6.627374649047852,
1865
+ "learning_rate": 4.862052918494881e-05,
1866
+ "loss": 2.982,
1867
+ "step": 129500
1868
+ },
1869
+ {
1870
+ "epoch": 25.927403270841644,
1871
+ "grad_norm": 6.500030517578125,
1872
+ "learning_rate": 4.8454660284536635e-05,
1873
+ "loss": 2.9497,
1874
+ "step": 130000
1875
+ },
1876
+ {
1877
+ "epoch": 26.027124052652574,
1878
+ "grad_norm": 6.908927917480469,
1879
+ "learning_rate": 4.828845898151842e-05,
1880
+ "loss": 2.9201,
1881
+ "step": 130500
1882
+ },
1883
+ {
1884
+ "epoch": 26.1268448344635,
1885
+ "grad_norm": 7.953597068786621,
1886
+ "learning_rate": 4.8122257678500205e-05,
1887
+ "loss": 2.8916,
1888
+ "step": 131000
1889
+ },
1890
+ {
1891
+ "epoch": 26.22656561627443,
1892
+ "grad_norm": 7.111712455749512,
1893
+ "learning_rate": 4.795605637548199e-05,
1894
+ "loss": 2.8983,
1895
+ "step": 131500
1896
+ },
1897
+ {
1898
+ "epoch": 26.32628639808536,
1899
+ "grad_norm": 7.099549293518066,
1900
+ "learning_rate": 4.778985507246377e-05,
1901
+ "loss": 2.8862,
1902
+ "step": 132000
1903
+ },
1904
+ {
1905
+ "epoch": 26.426007179896292,
1906
+ "grad_norm": 6.708031177520752,
1907
+ "learning_rate": 4.762365376944555e-05,
1908
+ "loss": 2.8828,
1909
+ "step": 132500
1910
+ },
1911
+ {
1912
+ "epoch": 26.52572796170722,
1913
+ "grad_norm": 6.638050079345703,
1914
+ "learning_rate": 4.745745246642734e-05,
1915
+ "loss": 2.9,
1916
+ "step": 133000
1917
+ },
1918
+ {
1919
+ "epoch": 26.62544874351815,
1920
+ "grad_norm": 6.474231243133545,
1921
+ "learning_rate": 4.729125116340912e-05,
1922
+ "loss": 2.8729,
1923
+ "step": 133500
1924
+ },
1925
+ {
1926
+ "epoch": 26.72516952532908,
1927
+ "grad_norm": 7.071346759796143,
1928
+ "learning_rate": 4.712538226299694e-05,
1929
+ "loss": 2.878,
1930
+ "step": 134000
1931
+ },
1932
+ {
1933
+ "epoch": 26.82489030714001,
1934
+ "grad_norm": 7.4629740715026855,
1935
+ "learning_rate": 4.695918095997873e-05,
1936
+ "loss": 2.8949,
1937
+ "step": 134500
1938
+ },
1939
+ {
1940
+ "epoch": 26.924611088950936,
1941
+ "grad_norm": 7.166282653808594,
1942
+ "learning_rate": 4.679331205956655e-05,
1943
+ "loss": 2.8834,
1944
+ "step": 135000
1945
+ },
1946
+ {
1947
+ "epoch": 27.024331870761866,
1948
+ "grad_norm": 7.213958263397217,
1949
+ "learning_rate": 4.6627110756548334e-05,
1950
+ "loss": 2.8722,
1951
+ "step": 135500
1952
+ },
1953
+ {
1954
+ "epoch": 27.124052652572797,
1955
+ "grad_norm": 6.917830467224121,
1956
+ "learning_rate": 4.6460909453530116e-05,
1957
+ "loss": 2.812,
1958
+ "step": 136000
1959
+ },
1960
+ {
1961
+ "epoch": 27.223773434383727,
1962
+ "grad_norm": 7.030029296875,
1963
+ "learning_rate": 4.62947081505119e-05,
1964
+ "loss": 2.7973,
1965
+ "step": 136500
1966
+ },
1967
+ {
1968
+ "epoch": 27.323494216194653,
1969
+ "grad_norm": 6.927401542663574,
1970
+ "learning_rate": 4.6128506847493686e-05,
1971
+ "loss": 2.8567,
1972
+ "step": 137000
1973
+ },
1974
+ {
1975
+ "epoch": 27.423214998005584,
1976
+ "grad_norm": 7.063901424407959,
1977
+ "learning_rate": 4.596230554447547e-05,
1978
+ "loss": 2.8119,
1979
+ "step": 137500
1980
+ },
1981
+ {
1982
+ "epoch": 27.522935779816514,
1983
+ "grad_norm": 6.619449138641357,
1984
+ "learning_rate": 4.5796104241457256e-05,
1985
+ "loss": 2.814,
1986
+ "step": 138000
1987
+ },
1988
+ {
1989
+ "epoch": 27.622656561627444,
1990
+ "grad_norm": 6.861698150634766,
1991
+ "learning_rate": 4.562990293843904e-05,
1992
+ "loss": 2.7966,
1993
+ "step": 138500
1994
+ },
1995
+ {
1996
+ "epoch": 27.72237734343837,
1997
+ "grad_norm": 5.698707580566406,
1998
+ "learning_rate": 4.5464034038026856e-05,
1999
+ "loss": 2.8274,
2000
+ "step": 139000
2001
+ },
2002
+ {
2003
+ "epoch": 27.8220981252493,
2004
+ "grad_norm": 6.638801574707031,
2005
+ "learning_rate": 4.5297832735008645e-05,
2006
+ "loss": 2.8111,
2007
+ "step": 139500
2008
+ },
2009
+ {
2010
+ "epoch": 27.92181890706023,
2011
+ "grad_norm": 7.414352893829346,
2012
+ "learning_rate": 4.5131631431990427e-05,
2013
+ "loss": 2.8219,
2014
+ "step": 140000
2015
+ },
2016
+ {
2017
+ "epoch": 28.02153968887116,
2018
+ "grad_norm": 7.000102519989014,
2019
+ "learning_rate": 4.4965430128972215e-05,
2020
+ "loss": 2.8059,
2021
+ "step": 140500
2022
+ },
2023
+ {
2024
+ "epoch": 28.121260470682092,
2025
+ "grad_norm": 7.648940563201904,
2026
+ "learning_rate": 4.4799561228560034e-05,
2027
+ "loss": 2.7801,
2028
+ "step": 141000
2029
+ },
2030
+ {
2031
+ "epoch": 28.22098125249302,
2032
+ "grad_norm": 6.238720417022705,
2033
+ "learning_rate": 4.4633359925541815e-05,
2034
+ "loss": 2.7611,
2035
+ "step": 141500
2036
+ },
2037
+ {
2038
+ "epoch": 28.32070203430395,
2039
+ "grad_norm": 7.083422660827637,
2040
+ "learning_rate": 4.4467491025129634e-05,
2041
+ "loss": 2.7476,
2042
+ "step": 142000
2043
+ },
2044
+ {
2045
+ "epoch": 28.42042281611488,
2046
+ "grad_norm": 7.1048760414123535,
2047
+ "learning_rate": 4.430128972211142e-05,
2048
+ "loss": 2.7601,
2049
+ "step": 142500
2050
+ },
2051
+ {
2052
+ "epoch": 28.52014359792581,
2053
+ "grad_norm": 6.950742244720459,
2054
+ "learning_rate": 4.4135088419093204e-05,
2055
+ "loss": 2.7615,
2056
+ "step": 143000
2057
+ },
2058
+ {
2059
+ "epoch": 28.619864379736736,
2060
+ "grad_norm": 7.063054084777832,
2061
+ "learning_rate": 4.396888711607499e-05,
2062
+ "loss": 2.7583,
2063
+ "step": 143500
2064
+ },
2065
+ {
2066
+ "epoch": 28.719585161547666,
2067
+ "grad_norm": 6.951484680175781,
2068
+ "learning_rate": 4.3802685813056774e-05,
2069
+ "loss": 2.748,
2070
+ "step": 144000
2071
+ },
2072
+ {
2073
+ "epoch": 28.819305943358597,
2074
+ "grad_norm": 7.212677955627441,
2075
+ "learning_rate": 4.363648451003856e-05,
2076
+ "loss": 2.7542,
2077
+ "step": 144500
2078
+ },
2079
+ {
2080
+ "epoch": 28.919026725169527,
2081
+ "grad_norm": 6.691658973693848,
2082
+ "learning_rate": 4.3470283207020344e-05,
2083
+ "loss": 2.753,
2084
+ "step": 145000
2085
+ },
2086
+ {
2087
+ "epoch": 29.018747506980453,
2088
+ "grad_norm": 7.1954874992370605,
2089
+ "learning_rate": 4.330408190400213e-05,
2090
+ "loss": 2.7332,
2091
+ "step": 145500
2092
+ },
2093
+ {
2094
+ "epoch": 29.118468288791384,
2095
+ "grad_norm": 6.654098987579346,
2096
+ "learning_rate": 4.313821300358995e-05,
2097
+ "loss": 2.7109,
2098
+ "step": 146000
2099
+ },
2100
+ {
2101
+ "epoch": 29.218189070602314,
2102
+ "grad_norm": 6.924403667449951,
2103
+ "learning_rate": 4.297201170057173e-05,
2104
+ "loss": 2.7076,
2105
+ "step": 146500
2106
+ },
2107
+ {
2108
+ "epoch": 29.317909852413244,
2109
+ "grad_norm": 7.731849193572998,
2110
+ "learning_rate": 4.280581039755352e-05,
2111
+ "loss": 2.6943,
2112
+ "step": 147000
2113
+ },
2114
+ {
2115
+ "epoch": 29.41763063422417,
2116
+ "grad_norm": 7.095526218414307,
2117
+ "learning_rate": 4.26396090945353e-05,
2118
+ "loss": 2.72,
2119
+ "step": 147500
2120
+ },
2121
+ {
2122
+ "epoch": 29.5173514160351,
2123
+ "grad_norm": 7.1939520835876465,
2124
+ "learning_rate": 4.247340779151709e-05,
2125
+ "loss": 2.6772,
2126
+ "step": 148000
2127
+ },
2128
+ {
2129
+ "epoch": 29.61707219784603,
2130
+ "grad_norm": 7.466503620147705,
2131
+ "learning_rate": 4.230753889110491e-05,
2132
+ "loss": 2.7193,
2133
+ "step": 148500
2134
+ },
2135
+ {
2136
+ "epoch": 29.71679297965696,
2137
+ "grad_norm": 6.902263164520264,
2138
+ "learning_rate": 4.214133758808669e-05,
2139
+ "loss": 2.716,
2140
+ "step": 149000
2141
+ },
2142
+ {
2143
+ "epoch": 29.81651376146789,
2144
+ "grad_norm": 7.366625785827637,
2145
+ "learning_rate": 4.197513628506848e-05,
2146
+ "loss": 2.7009,
2147
+ "step": 149500
2148
+ },
2149
+ {
2150
+ "epoch": 29.91623454327882,
2151
+ "grad_norm": 6.991941452026367,
2152
+ "learning_rate": 4.180893498205026e-05,
2153
+ "loss": 2.7202,
2154
+ "step": 150000
2155
+ },
2156
+ {
2157
+ "epoch": 30.0,
2158
+ "step": 150420,
2159
+ "total_flos": 1.5839169150106368e+17,
2160
+ "train_loss": 0.47119966579742084,
2161
+ "train_runtime": 6930.0607,
2162
+ "train_samples_per_second": 347.265,
2163
+ "train_steps_per_second": 21.705
2164
  }
2165
  ],
2166
  "logging_steps": 500,
2167
+ "max_steps": 150420,
2168
  "num_input_tokens_seen": 0,
2169
+ "num_train_epochs": 30,
2170
  "save_steps": 500,
2171
  "stateful_callbacks": {
2172
  "TrainerControl": {
 
2180
  "attributes": {}
2181
  }
2182
  },
2183
+ "total_flos": 1.5839169150106368e+17,
2184
  "train_batch_size": 16,
2185
  "trial_name": null,
2186
  "trial_params": null