akahana commited on
Commit
ac3ef5d
1 Parent(s): ea57034

End of training

Browse files
README.md CHANGED
@@ -1,9 +1,24 @@
1
  ---
2
  tags:
3
  - generated_from_trainer
 
 
 
 
4
  model-index:
5
  - name: roberta-javanese
6
- results: []
 
 
 
 
 
 
 
 
 
 
 
7
  ---
8
 
9
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
@@ -11,7 +26,10 @@ should probably proofread and complete it, then remove this comment. -->
11
 
12
  # roberta-javanese
13
 
14
- This model is a fine-tuned version of [](https://huggingface.co/) on an unknown dataset.
 
 
 
15
 
16
  ## Model description
17
 
 
1
  ---
2
  tags:
3
  - generated_from_trainer
4
+ datasets:
5
+ - akahana/GlotCC-V1-jav-Latn
6
+ metrics:
7
+ - accuracy
8
  model-index:
9
  - name: roberta-javanese
10
+ results:
11
+ - task:
12
+ name: Masked Language Modeling
13
+ type: fill-mask
14
+ dataset:
15
+ name: akahana/GlotCC-V1-jav-Latn default
16
+ type: akahana/GlotCC-V1-jav-Latn
17
+ args: default
18
+ metrics:
19
+ - name: Accuracy
20
+ type: accuracy
21
+ value: 0.14504326392276684
22
  ---
23
 
24
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 
26
 
27
  # roberta-javanese
28
 
29
+ This model is a fine-tuned version of [](https://huggingface.co/) on the akahana/GlotCC-V1-jav-Latn default dataset.
30
+ It achieves the following results on the evaluation set:
31
+ - Loss: 6.4623
32
+ - Accuracy: 0.1450
33
 
34
  ## Model description
35
 
all_results.json CHANGED
@@ -1,16 +1,16 @@
1
  {
2
- "epoch": 1.0,
3
- "eval_accuracy": 0.13537688699797523,
4
- "eval_loss": 6.956112861633301,
5
- "eval_runtime": 31.1351,
6
  "eval_samples": 4053,
7
- "eval_samples_per_second": 130.174,
8
- "eval_steps_per_second": 32.568,
9
- "perplexity": 1049.5458876332082,
10
- "total_flos": 5279723050035456.0,
11
- "train_loss": 7.35303517095492,
12
- "train_runtime": 1261.9378,
13
  "train_samples": 80219,
14
- "train_samples_per_second": 63.568,
15
- "train_steps_per_second": 3.973
16
  }
 
1
  {
2
+ "epoch": 5.0,
3
+ "eval_accuracy": 0.14504326392276684,
4
+ "eval_loss": 6.46232795715332,
5
+ "eval_runtime": 30.9776,
6
  "eval_samples": 4053,
7
+ "eval_samples_per_second": 130.836,
8
+ "eval_steps_per_second": 32.733,
9
+ "perplexity": 640.550496330922,
10
+ "total_flos": 2.639861525017728e+16,
11
+ "train_loss": 5.285465436767286,
12
+ "train_runtime": 6500.188,
13
  "train_samples": 80219,
14
+ "train_samples_per_second": 61.705,
15
+ "train_steps_per_second": 3.857
16
  }
eval_results.json CHANGED
@@ -1,10 +1,10 @@
1
  {
2
- "epoch": 1.0,
3
- "eval_accuracy": 0.13537688699797523,
4
- "eval_loss": 6.956112861633301,
5
- "eval_runtime": 31.1351,
6
  "eval_samples": 4053,
7
- "eval_samples_per_second": 130.174,
8
- "eval_steps_per_second": 32.568,
9
- "perplexity": 1049.5458876332082
10
  }
 
1
  {
2
+ "epoch": 5.0,
3
+ "eval_accuracy": 0.14504326392276684,
4
+ "eval_loss": 6.46232795715332,
5
+ "eval_runtime": 30.9776,
6
  "eval_samples": 4053,
7
+ "eval_samples_per_second": 130.836,
8
+ "eval_steps_per_second": 32.733,
9
+ "perplexity": 640.550496330922
10
  }
runs/Jul12_06-27-18_8b96195604fa/events.out.tfevents.1720772224.8b96195604fa.7935.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:903cc741d7d5ff85b732f774f8dabdccd56a460ff7cb4c633c3e384d9226211e
3
+ size 417
train_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "epoch": 1.0,
3
- "total_flos": 5279723050035456.0,
4
- "train_loss": 7.35303517095492,
5
- "train_runtime": 1261.9378,
6
  "train_samples": 80219,
7
- "train_samples_per_second": 63.568,
8
- "train_steps_per_second": 3.973
9
  }
 
1
  {
2
+ "epoch": 5.0,
3
+ "total_flos": 2.639861525017728e+16,
4
+ "train_loss": 5.285465436767286,
5
+ "train_runtime": 6500.188,
6
  "train_samples": 80219,
7
+ "train_samples_per_second": 61.705,
8
+ "train_steps_per_second": 3.857
9
  }
trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 1.0,
5
  "eval_steps": 500,
6
- "global_step": 5014,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -79,19 +79,299 @@
79
  "step": 5000
80
  },
81
  {
82
- "epoch": 1.0,
83
- "step": 5014,
84
- "total_flos": 5279723050035456.0,
85
- "train_loss": 7.35303517095492,
86
- "train_runtime": 1261.9378,
87
- "train_samples_per_second": 63.568,
88
- "train_steps_per_second": 3.973
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
89
  }
90
  ],
91
  "logging_steps": 500,
92
- "max_steps": 5014,
93
  "num_input_tokens_seen": 0,
94
- "num_train_epochs": 1,
95
  "save_steps": 500,
96
  "stateful_callbacks": {
97
  "TrainerControl": {
@@ -105,7 +385,7 @@
105
  "attributes": {}
106
  }
107
  },
108
- "total_flos": 5279723050035456.0,
109
  "train_batch_size": 16,
110
  "trial_name": null,
111
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 5.0,
5
  "eval_steps": 500,
6
+ "global_step": 25070,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
79
  "step": 5000
80
  },
81
  {
82
+ "epoch": 1.0969285999202234,
83
+ "grad_norm": 3.2722208499908447,
84
+ "learning_rate": 3.903270841643399e-05,
85
+ "loss": 7.0374,
86
+ "step": 5500
87
+ },
88
+ {
89
+ "epoch": 1.1966493817311528,
90
+ "grad_norm": 5.218217849731445,
91
+ "learning_rate": 3.803550059832469e-05,
92
+ "loss": 7.0289,
93
+ "step": 6000
94
+ },
95
+ {
96
+ "epoch": 1.2963701635420821,
97
+ "grad_norm": 3.466571807861328,
98
+ "learning_rate": 3.70382927802154e-05,
99
+ "loss": 6.9595,
100
+ "step": 6500
101
+ },
102
+ {
103
+ "epoch": 1.3960909453530115,
104
+ "grad_norm": 3.688443183898926,
105
+ "learning_rate": 3.6041084962106106e-05,
106
+ "loss": 6.9267,
107
+ "step": 7000
108
+ },
109
+ {
110
+ "epoch": 1.4958117271639408,
111
+ "grad_norm": 3.0426700115203857,
112
+ "learning_rate": 3.504387714399681e-05,
113
+ "loss": 6.8954,
114
+ "step": 7500
115
+ },
116
+ {
117
+ "epoch": 1.5955325089748702,
118
+ "grad_norm": 3.7769949436187744,
119
+ "learning_rate": 3.404666932588751e-05,
120
+ "loss": 6.8657,
121
+ "step": 8000
122
+ },
123
+ {
124
+ "epoch": 1.6952532907857998,
125
+ "grad_norm": 3.0776305198669434,
126
+ "learning_rate": 3.304946150777822e-05,
127
+ "loss": 6.8285,
128
+ "step": 8500
129
+ },
130
+ {
131
+ "epoch": 1.7949740725967291,
132
+ "grad_norm": 3.350515604019165,
133
+ "learning_rate": 3.2052253689668926e-05,
134
+ "loss": 6.7948,
135
+ "step": 9000
136
+ },
137
+ {
138
+ "epoch": 1.8946948544076585,
139
+ "grad_norm": 3.393035411834717,
140
+ "learning_rate": 3.1055045871559636e-05,
141
+ "loss": 6.7725,
142
+ "step": 9500
143
+ },
144
+ {
145
+ "epoch": 1.994415636218588,
146
+ "grad_norm": 3.438401222229004,
147
+ "learning_rate": 3.0057838053450336e-05,
148
+ "loss": 6.7484,
149
+ "step": 10000
150
+ },
151
+ {
152
+ "epoch": 2.0941364180295174,
153
+ "grad_norm": 4.042023181915283,
154
+ "learning_rate": 2.9060630235341047e-05,
155
+ "loss": 6.6939,
156
+ "step": 10500
157
+ },
158
+ {
159
+ "epoch": 2.193857199840447,
160
+ "grad_norm": 3.3481028079986572,
161
+ "learning_rate": 2.8063422417231757e-05,
162
+ "loss": 6.6854,
163
+ "step": 11000
164
+ },
165
+ {
166
+ "epoch": 2.293577981651376,
167
+ "grad_norm": 3.266961097717285,
168
+ "learning_rate": 2.706820901475868e-05,
169
+ "loss": 6.6555,
170
+ "step": 11500
171
+ },
172
+ {
173
+ "epoch": 2.3932987634623055,
174
+ "grad_norm": 3.215405225753784,
175
+ "learning_rate": 2.607100119664938e-05,
176
+ "loss": 6.6713,
177
+ "step": 12000
178
+ },
179
+ {
180
+ "epoch": 2.493019545273235,
181
+ "grad_norm": 3.380500316619873,
182
+ "learning_rate": 2.507379337854009e-05,
183
+ "loss": 6.6581,
184
+ "step": 12500
185
+ },
186
+ {
187
+ "epoch": 2.5927403270841642,
188
+ "grad_norm": 3.536166191101074,
189
+ "learning_rate": 2.4076585560430796e-05,
190
+ "loss": 6.5945,
191
+ "step": 13000
192
+ },
193
+ {
194
+ "epoch": 2.6924611088950936,
195
+ "grad_norm": 3.9319474697113037,
196
+ "learning_rate": 2.30793777423215e-05,
197
+ "loss": 6.6057,
198
+ "step": 13500
199
+ },
200
+ {
201
+ "epoch": 2.792181890706023,
202
+ "grad_norm": 4.334239482879639,
203
+ "learning_rate": 2.2084164339848425e-05,
204
+ "loss": 6.5818,
205
+ "step": 14000
206
+ },
207
+ {
208
+ "epoch": 2.8919026725169523,
209
+ "grad_norm": 4.093286514282227,
210
+ "learning_rate": 2.1086956521739132e-05,
211
+ "loss": 6.5732,
212
+ "step": 14500
213
+ },
214
+ {
215
+ "epoch": 2.9916234543278817,
216
+ "grad_norm": 4.026576995849609,
217
+ "learning_rate": 2.008974870362984e-05,
218
+ "loss": 6.5627,
219
+ "step": 15000
220
+ },
221
+ {
222
+ "epoch": 3.0913442361388115,
223
+ "grad_norm": 3.7285637855529785,
224
+ "learning_rate": 1.9092540885520542e-05,
225
+ "loss": 6.5268,
226
+ "step": 15500
227
+ },
228
+ {
229
+ "epoch": 3.191065017949741,
230
+ "grad_norm": 3.7349226474761963,
231
+ "learning_rate": 1.809533306741125e-05,
232
+ "loss": 6.5388,
233
+ "step": 16000
234
+ },
235
+ {
236
+ "epoch": 3.29078579976067,
237
+ "grad_norm": 3.5330066680908203,
238
+ "learning_rate": 1.7098125249301956e-05,
239
+ "loss": 6.5141,
240
+ "step": 16500
241
+ },
242
+ {
243
+ "epoch": 3.3905065815715996,
244
+ "grad_norm": 3.6961631774902344,
245
+ "learning_rate": 1.6100917431192662e-05,
246
+ "loss": 6.5013,
247
+ "step": 17000
248
+ },
249
+ {
250
+ "epoch": 3.490227363382529,
251
+ "grad_norm": 3.413053274154663,
252
+ "learning_rate": 1.5103709613083367e-05,
253
+ "loss": 6.4932,
254
+ "step": 17500
255
+ },
256
+ {
257
+ "epoch": 3.5899481451934583,
258
+ "grad_norm": 4.584457874298096,
259
+ "learning_rate": 1.4108496210610292e-05,
260
+ "loss": 6.4695,
261
+ "step": 18000
262
+ },
263
+ {
264
+ "epoch": 3.6896689270043876,
265
+ "grad_norm": 3.3078787326812744,
266
+ "learning_rate": 1.3111288392500998e-05,
267
+ "loss": 6.4711,
268
+ "step": 18500
269
+ },
270
+ {
271
+ "epoch": 3.789389708815317,
272
+ "grad_norm": 3.6679279804229736,
273
+ "learning_rate": 1.2114080574391703e-05,
274
+ "loss": 6.466,
275
+ "step": 19000
276
+ },
277
+ {
278
+ "epoch": 3.8891104906262464,
279
+ "grad_norm": 4.358784198760986,
280
+ "learning_rate": 1.1116872756282408e-05,
281
+ "loss": 6.4568,
282
+ "step": 19500
283
+ },
284
+ {
285
+ "epoch": 3.988831272437176,
286
+ "grad_norm": 4.014244556427002,
287
+ "learning_rate": 1.0119664938173115e-05,
288
+ "loss": 6.4536,
289
+ "step": 20000
290
+ },
291
+ {
292
+ "epoch": 4.0885520542481055,
293
+ "grad_norm": 3.8396079540252686,
294
+ "learning_rate": 9.122457120063822e-06,
295
+ "loss": 6.443,
296
+ "step": 20500
297
+ },
298
+ {
299
+ "epoch": 4.188272836059035,
300
+ "grad_norm": 3.850647449493408,
301
+ "learning_rate": 8.125249301954529e-06,
302
+ "loss": 6.4186,
303
+ "step": 21000
304
+ },
305
+ {
306
+ "epoch": 4.287993617869964,
307
+ "grad_norm": 3.829951047897339,
308
+ "learning_rate": 7.128041483845234e-06,
309
+ "loss": 6.4178,
310
+ "step": 21500
311
+ },
312
+ {
313
+ "epoch": 4.387714399680894,
314
+ "grad_norm": 3.5512278079986572,
315
+ "learning_rate": 6.132828081372159e-06,
316
+ "loss": 6.4055,
317
+ "step": 22000
318
+ },
319
+ {
320
+ "epoch": 4.487435181491823,
321
+ "grad_norm": 3.568665027618408,
322
+ "learning_rate": 5.135620263262864e-06,
323
+ "loss": 6.4076,
324
+ "step": 22500
325
+ },
326
+ {
327
+ "epoch": 4.587155963302752,
328
+ "grad_norm": 3.71463942527771,
329
+ "learning_rate": 4.13841244515357e-06,
330
+ "loss": 6.4086,
331
+ "step": 23000
332
+ },
333
+ {
334
+ "epoch": 4.686876745113682,
335
+ "grad_norm": 3.9615983963012695,
336
+ "learning_rate": 3.1412046270442757e-06,
337
+ "loss": 6.4061,
338
+ "step": 23500
339
+ },
340
+ {
341
+ "epoch": 4.786597526924611,
342
+ "grad_norm": 4.0287909507751465,
343
+ "learning_rate": 2.1459912245712007e-06,
344
+ "loss": 6.3772,
345
+ "step": 24000
346
+ },
347
+ {
348
+ "epoch": 4.88631830873554,
349
+ "grad_norm": 4.012565612792969,
350
+ "learning_rate": 1.1487834064619066e-06,
351
+ "loss": 6.3956,
352
+ "step": 24500
353
+ },
354
+ {
355
+ "epoch": 4.98603909054647,
356
+ "grad_norm": 4.36814022064209,
357
+ "learning_rate": 1.515755883526127e-07,
358
+ "loss": 6.3996,
359
+ "step": 25000
360
+ },
361
+ {
362
+ "epoch": 5.0,
363
+ "step": 25070,
364
+ "total_flos": 2.639861525017728e+16,
365
+ "train_loss": 5.285465436767286,
366
+ "train_runtime": 6500.188,
367
+ "train_samples_per_second": 61.705,
368
+ "train_steps_per_second": 3.857
369
  }
370
  ],
371
  "logging_steps": 500,
372
+ "max_steps": 25070,
373
  "num_input_tokens_seen": 0,
374
+ "num_train_epochs": 5,
375
  "save_steps": 500,
376
  "stateful_callbacks": {
377
  "TrainerControl": {
 
385
  "attributes": {}
386
  }
387
  },
388
+ "total_flos": 2.639861525017728e+16,
389
  "train_batch_size": 16,
390
  "trial_name": null,
391
  "trial_params": null