akahana commited on
Commit
d9f296c
1 Parent(s): 7f48277

End of training

Browse files
README.md CHANGED
@@ -1,9 +1,24 @@
1
  ---
2
  tags:
3
  - generated_from_trainer
 
 
 
 
4
  model-index:
5
  - name: gpt2-javanese
6
- results: []
 
 
 
 
 
 
 
 
 
 
 
7
  ---
8
 
9
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
@@ -11,7 +26,10 @@ should probably proofread and complete it, then remove this comment. -->
11
 
12
  # gpt2-javanese
13
 
14
- This model is a fine-tuned version of [](https://huggingface.co/) on an unknown dataset.
 
 
 
15
 
16
  ## Model description
17
 
 
1
  ---
2
  tags:
3
  - generated_from_trainer
4
+ datasets:
5
+ - akahana/GlotCC-V1-jav-Latn
6
+ metrics:
7
+ - accuracy
8
  model-index:
9
  - name: gpt2-javanese
10
+ results:
11
+ - task:
12
+ name: Causal Language Modeling
13
+ type: text-generation
14
+ dataset:
15
+ name: akahana/GlotCC-V1-jav-Latn default
16
+ type: akahana/GlotCC-V1-jav-Latn
17
+ args: default
18
+ metrics:
19
+ - name: Accuracy
20
+ type: accuracy
21
+ value: 0.3642142345585816
22
  ---
23
 
24
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 
26
 
27
  # gpt2-javanese
28
 
29
+ This model is a fine-tuned version of [](https://huggingface.co/) on the akahana/GlotCC-V1-jav-Latn default dataset.
30
+ It achieves the following results on the evaluation set:
31
+ - Loss: 4.0790
32
+ - Accuracy: 0.3642
33
 
34
  ## Model description
35
 
all_results.json CHANGED
@@ -1,16 +1,16 @@
1
  {
2
- "epoch": 5.0,
3
- "eval_accuracy": 0.3174747885333798,
4
- "eval_loss": 4.555253505706787,
5
- "eval_runtime": 23.033,
6
  "eval_samples": 2026,
7
- "eval_samples_per_second": 87.961,
8
- "eval_steps_per_second": 22.012,
9
- "perplexity": 95.1308683970671,
10
- "total_flos": 2.619975204864e+16,
11
- "train_loss": 5.155460170506956,
12
- "train_runtime": 6504.3825,
13
  "train_samples": 40108,
14
- "train_samples_per_second": 30.832,
15
- "train_steps_per_second": 1.927
16
  }
 
1
  {
2
+ "epoch": 10.0,
3
+ "eval_accuracy": 0.3642142345585816,
4
+ "eval_loss": 4.079000473022461,
5
+ "eval_runtime": 23.2829,
6
  "eval_samples": 2026,
7
+ "eval_samples_per_second": 87.017,
8
+ "eval_steps_per_second": 21.776,
9
+ "perplexity": 59.08638189209135,
10
+ "total_flos": 5.239950409728e+16,
11
+ "train_loss": 1.9918543002688176,
12
+ "train_runtime": 6309.7744,
13
  "train_samples": 40108,
14
+ "train_samples_per_second": 63.565,
15
+ "train_steps_per_second": 3.973
16
  }
eval_results.json CHANGED
@@ -1,10 +1,10 @@
1
  {
2
- "epoch": 5.0,
3
- "eval_accuracy": 0.3174747885333798,
4
- "eval_loss": 4.555253505706787,
5
- "eval_runtime": 23.033,
6
  "eval_samples": 2026,
7
- "eval_samples_per_second": 87.961,
8
- "eval_steps_per_second": 22.012,
9
- "perplexity": 95.1308683970671
10
  }
 
1
  {
2
+ "epoch": 10.0,
3
+ "eval_accuracy": 0.3642142345585816,
4
+ "eval_loss": 4.079000473022461,
5
+ "eval_runtime": 23.2829,
6
  "eval_samples": 2026,
7
+ "eval_samples_per_second": 87.017,
8
+ "eval_steps_per_second": 21.776,
9
+ "perplexity": 59.08638189209135
10
  }
runs/Jul22_00-53-04_850d126be01f/events.out.tfevents.1721616051.850d126be01f.953.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:073bd173b309bd65eac9d86e1ba421aa46d293c7471834b59c24500a7ff1ab0f
3
+ size 417
train_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "epoch": 5.0,
3
- "total_flos": 2.619975204864e+16,
4
- "train_loss": 5.155460170506956,
5
- "train_runtime": 6504.3825,
6
  "train_samples": 40108,
7
- "train_samples_per_second": 30.832,
8
- "train_steps_per_second": 1.927
9
  }
 
1
  {
2
+ "epoch": 10.0,
3
+ "total_flos": 5.239950409728e+16,
4
+ "train_loss": 1.9918543002688176,
5
+ "train_runtime": 6309.7744,
6
  "train_samples": 40108,
7
+ "train_samples_per_second": 63.565,
8
+ "train_steps_per_second": 3.973
9
  }
trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 5.0,
5
  "eval_steps": 500,
6
- "global_step": 12535,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -191,12 +191,196 @@
191
  "train_runtime": 6504.3825,
192
  "train_samples_per_second": 30.832,
193
  "train_steps_per_second": 1.927
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
194
  }
195
  ],
196
  "logging_steps": 500,
197
- "max_steps": 12535,
198
  "num_input_tokens_seen": 0,
199
- "num_train_epochs": 5,
200
  "save_steps": 500,
201
  "stateful_callbacks": {
202
  "TrainerControl": {
@@ -210,7 +394,7 @@
210
  "attributes": {}
211
  }
212
  },
213
- "total_flos": 2.619975204864e+16,
214
  "train_batch_size": 16,
215
  "trial_name": null,
216
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 10.0,
5
  "eval_steps": 500,
6
+ "global_step": 25070,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
191
  "train_runtime": 6504.3825,
192
  "train_samples_per_second": 30.832,
193
  "train_steps_per_second": 1.927
194
+ },
195
+ {
196
+ "epoch": 5.1854806541683285,
197
+ "grad_norm": 2.8765642642974854,
198
+ "learning_rate": 4.907259672915836e-05,
199
+ "loss": 4.6174,
200
+ "step": 13000
201
+ },
202
+ {
203
+ "epoch": 5.384922217790187,
204
+ "grad_norm": 2.5134449005126953,
205
+ "learning_rate": 4.8075388911049066e-05,
206
+ "loss": 4.5994,
207
+ "step": 13500
208
+ },
209
+ {
210
+ "epoch": 5.584363781412046,
211
+ "grad_norm": 2.6400318145751953,
212
+ "learning_rate": 4.707818109293977e-05,
213
+ "loss": 4.5189,
214
+ "step": 14000
215
+ },
216
+ {
217
+ "epoch": 5.783805345033905,
218
+ "grad_norm": 2.5880420207977295,
219
+ "learning_rate": 4.608097327483047e-05,
220
+ "loss": 4.4798,
221
+ "step": 14500
222
+ },
223
+ {
224
+ "epoch": 5.983246908655763,
225
+ "grad_norm": 2.620410919189453,
226
+ "learning_rate": 4.508376545672118e-05,
227
+ "loss": 4.4567,
228
+ "step": 15000
229
+ },
230
+ {
231
+ "epoch": 6.182688472277623,
232
+ "grad_norm": 2.8458619117736816,
233
+ "learning_rate": 4.4086557638611886e-05,
234
+ "loss": 4.2553,
235
+ "step": 15500
236
+ },
237
+ {
238
+ "epoch": 6.382130035899482,
239
+ "grad_norm": 2.748814105987549,
240
+ "learning_rate": 4.3089349820502596e-05,
241
+ "loss": 4.2376,
242
+ "step": 16000
243
+ },
244
+ {
245
+ "epoch": 6.58157159952134,
246
+ "grad_norm": 2.752399444580078,
247
+ "learning_rate": 4.2092142002393306e-05,
248
+ "loss": 4.2098,
249
+ "step": 16500
250
+ },
251
+ {
252
+ "epoch": 6.781013163143199,
253
+ "grad_norm": 2.8099758625030518,
254
+ "learning_rate": 4.1094934184284e-05,
255
+ "loss": 4.1562,
256
+ "step": 17000
257
+ },
258
+ {
259
+ "epoch": 6.980454726765058,
260
+ "grad_norm": 2.6280643939971924,
261
+ "learning_rate": 4.009772636617471e-05,
262
+ "loss": 4.1439,
263
+ "step": 17500
264
+ },
265
+ {
266
+ "epoch": 7.179896290386917,
267
+ "grad_norm": 2.8597283363342285,
268
+ "learning_rate": 3.9100518548065417e-05,
269
+ "loss": 3.9943,
270
+ "step": 18000
271
+ },
272
+ {
273
+ "epoch": 7.379337854008775,
274
+ "grad_norm": 3.049448251724243,
275
+ "learning_rate": 3.810331072995613e-05,
276
+ "loss": 3.9513,
277
+ "step": 18500
278
+ },
279
+ {
280
+ "epoch": 7.578779417630634,
281
+ "grad_norm": 3.0431199073791504,
282
+ "learning_rate": 3.710610291184683e-05,
283
+ "loss": 3.9371,
284
+ "step": 19000
285
+ },
286
+ {
287
+ "epoch": 7.778220981252493,
288
+ "grad_norm": 2.9101345539093018,
289
+ "learning_rate": 3.610889509373754e-05,
290
+ "loss": 3.9142,
291
+ "step": 19500
292
+ },
293
+ {
294
+ "epoch": 7.9776625448743514,
295
+ "grad_norm": 3.04560923576355,
296
+ "learning_rate": 3.5111687275628244e-05,
297
+ "loss": 3.897,
298
+ "step": 20000
299
+ },
300
+ {
301
+ "epoch": 8.177104108496211,
302
+ "grad_norm": 3.3787050247192383,
303
+ "learning_rate": 3.411447945751895e-05,
304
+ "loss": 3.7559,
305
+ "step": 20500
306
+ },
307
+ {
308
+ "epoch": 8.37654567211807,
309
+ "grad_norm": 3.308147430419922,
310
+ "learning_rate": 3.311926605504587e-05,
311
+ "loss": 3.727,
312
+ "step": 21000
313
+ },
314
+ {
315
+ "epoch": 8.575987235739928,
316
+ "grad_norm": 3.117141008377075,
317
+ "learning_rate": 3.212205823693658e-05,
318
+ "loss": 3.7048,
319
+ "step": 21500
320
+ },
321
+ {
322
+ "epoch": 8.775428799361787,
323
+ "grad_norm": 3.0353927612304688,
324
+ "learning_rate": 3.1124850418827286e-05,
325
+ "loss": 3.6959,
326
+ "step": 22000
327
+ },
328
+ {
329
+ "epoch": 8.974870362983646,
330
+ "grad_norm": 3.1278910636901855,
331
+ "learning_rate": 3.0127642600717993e-05,
332
+ "loss": 3.7229,
333
+ "step": 22500
334
+ },
335
+ {
336
+ "epoch": 9.174311926605505,
337
+ "grad_norm": 3.2263429164886475,
338
+ "learning_rate": 2.9132429198244916e-05,
339
+ "loss": 3.5526,
340
+ "step": 23000
341
+ },
342
+ {
343
+ "epoch": 9.373753490227363,
344
+ "grad_norm": 3.4758195877075195,
345
+ "learning_rate": 2.8135221380135622e-05,
346
+ "loss": 3.5455,
347
+ "step": 23500
348
+ },
349
+ {
350
+ "epoch": 9.573195053849222,
351
+ "grad_norm": 3.5570030212402344,
352
+ "learning_rate": 2.7138013562026326e-05,
353
+ "loss": 3.5792,
354
+ "step": 24000
355
+ },
356
+ {
357
+ "epoch": 9.77263661747108,
358
+ "grad_norm": 3.397296667098999,
359
+ "learning_rate": 2.6140805743917036e-05,
360
+ "loss": 3.5237,
361
+ "step": 24500
362
+ },
363
+ {
364
+ "epoch": 9.97207818109294,
365
+ "grad_norm": 3.1818623542785645,
366
+ "learning_rate": 2.5145592341443958e-05,
367
+ "loss": 3.5251,
368
+ "step": 25000
369
+ },
370
+ {
371
+ "epoch": 10.0,
372
+ "step": 25070,
373
+ "total_flos": 5.239950409728e+16,
374
+ "train_loss": 1.9918543002688176,
375
+ "train_runtime": 6309.7744,
376
+ "train_samples_per_second": 63.565,
377
+ "train_steps_per_second": 3.973
378
  }
379
  ],
380
  "logging_steps": 500,
381
+ "max_steps": 25070,
382
  "num_input_tokens_seen": 0,
383
+ "num_train_epochs": 10,
384
  "save_steps": 500,
385
  "stateful_callbacks": {
386
  "TrainerControl": {
 
394
  "attributes": {}
395
  }
396
  },
397
+ "total_flos": 5.239950409728e+16,
398
  "train_batch_size": 16,
399
  "trial_name": null,
400
  "trial_params": null