akahana commited on
Commit
61499e9
1 Parent(s): bc2c996

End of training

Browse files
README.md CHANGED
@@ -1,9 +1,24 @@
1
  ---
2
  tags:
3
  - generated_from_trainer
 
 
 
 
4
  model-index:
5
  - name: distilgpt2-javanese
6
- results: []
 
 
 
 
 
 
 
 
 
 
 
7
  ---
8
 
9
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
@@ -11,7 +26,10 @@ should probably proofread and complete it, then remove this comment. -->
11
 
12
  # distilgpt2-javanese
13
 
14
- This model is a fine-tuned version of [](https://huggingface.co/) on an unknown dataset.
 
 
 
15
 
16
  ## Model description
17
 
 
1
  ---
2
  tags:
3
  - generated_from_trainer
4
+ datasets:
5
+ - akahana/GlotCC-V1-jav-Latn
6
+ metrics:
7
+ - accuracy
8
  model-index:
9
  - name: distilgpt2-javanese
10
+ results:
11
+ - task:
12
+ name: Causal Language Modeling
13
+ type: text-generation
14
+ dataset:
15
+ name: akahana/GlotCC-V1-jav-Latn default
16
+ type: akahana/GlotCC-V1-jav-Latn
17
+ args: default
18
+ metrics:
19
+ - name: Accuracy
20
+ type: accuracy
21
+ value: 0.349037028434276
22
  ---
23
 
24
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 
26
 
27
  # distilgpt2-javanese
28
 
29
+ This model is a fine-tuned version of [](https://huggingface.co/) on the akahana/GlotCC-V1-jav-Latn default dataset.
30
+ It achieves the following results on the evaluation set:
31
+ - Loss: 4.1864
32
+ - Accuracy: 0.3490
33
 
34
  ## Model description
35
 
all_results.json CHANGED
@@ -1,16 +1,16 @@
1
  {
2
- "epoch": 5.0,
3
- "eval_accuracy": 0.29955480711534366,
4
- "eval_loss": 4.690457820892334,
5
- "eval_runtime": 18.6192,
6
  "eval_samples": 2026,
7
- "eval_samples_per_second": 108.812,
8
- "eval_steps_per_second": 27.23,
9
- "perplexity": 108.9030264768815,
10
- "total_flos": 1.310011259092992e+16,
11
- "train_loss": 5.274392118328446,
12
- "train_runtime": 3806.2907,
13
  "train_samples": 40108,
14
- "train_samples_per_second": 52.686,
15
- "train_steps_per_second": 3.293
16
  }
 
1
  {
2
+ "epoch": 10.0,
3
+ "eval_accuracy": 0.349037028434276,
4
+ "eval_loss": 4.1864213943481445,
5
+ "eval_runtime": 17.3075,
6
  "eval_samples": 2026,
7
+ "eval_samples_per_second": 117.059,
8
+ "eval_steps_per_second": 29.294,
9
+ "perplexity": 65.78694368142702,
10
+ "total_flos": 2.620022518185984e+16,
11
+ "train_loss": 2.0909440845904905,
12
+ "train_runtime": 3914.0913,
13
  "train_samples": 40108,
14
+ "train_samples_per_second": 102.471,
15
+ "train_steps_per_second": 6.405
16
  }
eval_results.json CHANGED
@@ -1,10 +1,10 @@
1
  {
2
- "epoch": 5.0,
3
- "eval_accuracy": 0.29955480711534366,
4
- "eval_loss": 4.690457820892334,
5
- "eval_runtime": 18.6192,
6
  "eval_samples": 2026,
7
- "eval_samples_per_second": 108.812,
8
- "eval_steps_per_second": 27.23,
9
- "perplexity": 108.9030264768815
10
  }
 
1
  {
2
+ "epoch": 10.0,
3
+ "eval_accuracy": 0.349037028434276,
4
+ "eval_loss": 4.1864213943481445,
5
+ "eval_runtime": 17.3075,
6
  "eval_samples": 2026,
7
+ "eval_samples_per_second": 117.059,
8
+ "eval_steps_per_second": 29.294,
9
+ "perplexity": 65.78694368142702
10
  }
runs/Jul24_00-46-24_73a83f74d69d/events.out.tfevents.1721785958.73a83f74d69d.3048.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7c7444f25960ac57f53d145c6210eeadfb497c89628cdb2dd0465835cb8ae181
3
+ size 417
train_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "epoch": 5.0,
3
- "total_flos": 1.310011259092992e+16,
4
- "train_loss": 5.274392118328446,
5
- "train_runtime": 3806.2907,
6
  "train_samples": 40108,
7
- "train_samples_per_second": 52.686,
8
- "train_steps_per_second": 3.293
9
  }
 
1
  {
2
+ "epoch": 10.0,
3
+ "total_flos": 2.620022518185984e+16,
4
+ "train_loss": 2.0909440845904905,
5
+ "train_runtime": 3914.0913,
6
  "train_samples": 40108,
7
+ "train_samples_per_second": 102.471,
8
+ "train_steps_per_second": 6.405
9
  }
trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 5.0,
5
  "eval_steps": 500,
6
- "global_step": 12535,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -191,12 +191,196 @@
191
  "train_runtime": 3806.2907,
192
  "train_samples_per_second": 52.686,
193
  "train_steps_per_second": 3.293
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
194
  }
195
  ],
196
  "logging_steps": 500,
197
- "max_steps": 12535,
198
  "num_input_tokens_seen": 0,
199
- "num_train_epochs": 5,
200
  "save_steps": 500,
201
  "stateful_callbacks": {
202
  "TrainerControl": {
@@ -210,7 +394,7 @@
210
  "attributes": {}
211
  }
212
  },
213
- "total_flos": 1.310011259092992e+16,
214
  "train_batch_size": 16,
215
  "trial_name": null,
216
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 10.0,
5
  "eval_steps": 500,
6
+ "global_step": 25070,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
191
  "train_runtime": 3806.2907,
192
  "train_samples_per_second": 52.686,
193
  "train_steps_per_second": 3.293
194
+ },
195
+ {
196
+ "epoch": 5.1854806541683285,
197
+ "grad_norm": 2.7731709480285645,
198
+ "learning_rate": 4.907259672915836e-05,
199
+ "loss": 4.7681,
200
+ "step": 13000
201
+ },
202
+ {
203
+ "epoch": 5.384922217790187,
204
+ "grad_norm": 2.5628647804260254,
205
+ "learning_rate": 4.8075388911049066e-05,
206
+ "loss": 4.7441,
207
+ "step": 13500
208
+ },
209
+ {
210
+ "epoch": 5.584363781412046,
211
+ "grad_norm": 2.7598373889923096,
212
+ "learning_rate": 4.707818109293977e-05,
213
+ "loss": 4.6634,
214
+ "step": 14000
215
+ },
216
+ {
217
+ "epoch": 5.783805345033905,
218
+ "grad_norm": 2.619565486907959,
219
+ "learning_rate": 4.608097327483047e-05,
220
+ "loss": 4.6238,
221
+ "step": 14500
222
+ },
223
+ {
224
+ "epoch": 5.983246908655763,
225
+ "grad_norm": 2.6309878826141357,
226
+ "learning_rate": 4.508376545672118e-05,
227
+ "loss": 4.6015,
228
+ "step": 15000
229
+ },
230
+ {
231
+ "epoch": 6.182688472277623,
232
+ "grad_norm": 2.6872658729553223,
233
+ "learning_rate": 4.4086557638611886e-05,
234
+ "loss": 4.4218,
235
+ "step": 15500
236
+ },
237
+ {
238
+ "epoch": 6.382130035899482,
239
+ "grad_norm": 2.780477285385132,
240
+ "learning_rate": 4.3089349820502596e-05,
241
+ "loss": 4.405,
242
+ "step": 16000
243
+ },
244
+ {
245
+ "epoch": 6.58157159952134,
246
+ "grad_norm": 2.700143575668335,
247
+ "learning_rate": 4.2092142002393306e-05,
248
+ "loss": 4.3785,
249
+ "step": 16500
250
+ },
251
+ {
252
+ "epoch": 6.781013163143199,
253
+ "grad_norm": 2.7577366828918457,
254
+ "learning_rate": 4.1094934184284e-05,
255
+ "loss": 4.3257,
256
+ "step": 17000
257
+ },
258
+ {
259
+ "epoch": 6.980454726765058,
260
+ "grad_norm": 2.6972854137420654,
261
+ "learning_rate": 4.009772636617471e-05,
262
+ "loss": 4.3124,
263
+ "step": 17500
264
+ },
265
+ {
266
+ "epoch": 7.179896290386917,
267
+ "grad_norm": 2.8049590587615967,
268
+ "learning_rate": 3.9100518548065417e-05,
269
+ "loss": 4.1901,
270
+ "step": 18000
271
+ },
272
+ {
273
+ "epoch": 7.379337854008775,
274
+ "grad_norm": 3.0153963565826416,
275
+ "learning_rate": 3.810331072995613e-05,
276
+ "loss": 4.1466,
277
+ "step": 18500
278
+ },
279
+ {
280
+ "epoch": 7.578779417630634,
281
+ "grad_norm": 3.010798215866089,
282
+ "learning_rate": 3.710610291184683e-05,
283
+ "loss": 4.1345,
284
+ "step": 19000
285
+ },
286
+ {
287
+ "epoch": 7.778220981252493,
288
+ "grad_norm": 2.857867956161499,
289
+ "learning_rate": 3.610889509373754e-05,
290
+ "loss": 4.1093,
291
+ "step": 19500
292
+ },
293
+ {
294
+ "epoch": 7.9776625448743514,
295
+ "grad_norm": 2.9866838455200195,
296
+ "learning_rate": 3.5111687275628244e-05,
297
+ "loss": 4.091,
298
+ "step": 20000
299
+ },
300
+ {
301
+ "epoch": 8.177104108496211,
302
+ "grad_norm": 3.2831859588623047,
303
+ "learning_rate": 3.411447945751895e-05,
304
+ "loss": 3.9812,
305
+ "step": 20500
306
+ },
307
+ {
308
+ "epoch": 8.37654567211807,
309
+ "grad_norm": 3.1285603046417236,
310
+ "learning_rate": 3.311926605504587e-05,
311
+ "loss": 3.955,
312
+ "step": 21000
313
+ },
314
+ {
315
+ "epoch": 8.575987235739928,
316
+ "grad_norm": 2.9974467754364014,
317
+ "learning_rate": 3.212205823693658e-05,
318
+ "loss": 3.9307,
319
+ "step": 21500
320
+ },
321
+ {
322
+ "epoch": 8.775428799361787,
323
+ "grad_norm": 3.0734570026397705,
324
+ "learning_rate": 3.1124850418827286e-05,
325
+ "loss": 3.9186,
326
+ "step": 22000
327
+ },
328
+ {
329
+ "epoch": 8.974870362983646,
330
+ "grad_norm": 3.024569034576416,
331
+ "learning_rate": 3.0129637016354212e-05,
332
+ "loss": 3.9455,
333
+ "step": 22500
334
+ },
335
+ {
336
+ "epoch": 9.174311926605505,
337
+ "grad_norm": 3.05592679977417,
338
+ "learning_rate": 2.9132429198244916e-05,
339
+ "loss": 3.8104,
340
+ "step": 23000
341
+ },
342
+ {
343
+ "epoch": 9.373753490227363,
344
+ "grad_norm": 3.2957406044006348,
345
+ "learning_rate": 2.8135221380135622e-05,
346
+ "loss": 3.8007,
347
+ "step": 23500
348
+ },
349
+ {
350
+ "epoch": 9.573195053849222,
351
+ "grad_norm": 3.398186683654785,
352
+ "learning_rate": 2.7138013562026326e-05,
353
+ "loss": 3.8345,
354
+ "step": 24000
355
+ },
356
+ {
357
+ "epoch": 9.77263661747108,
358
+ "grad_norm": 3.1825666427612305,
359
+ "learning_rate": 2.6140805743917036e-05,
360
+ "loss": 3.7761,
361
+ "step": 24500
362
+ },
363
+ {
364
+ "epoch": 9.97207818109294,
365
+ "grad_norm": 2.9551422595977783,
366
+ "learning_rate": 2.514359792580774e-05,
367
+ "loss": 3.777,
368
+ "step": 25000
369
+ },
370
+ {
371
+ "epoch": 10.0,
372
+ "step": 25070,
373
+ "total_flos": 2.620022518185984e+16,
374
+ "train_loss": 2.0909440845904905,
375
+ "train_runtime": 3914.0913,
376
+ "train_samples_per_second": 102.471,
377
+ "train_steps_per_second": 6.405
378
  }
379
  ],
380
  "logging_steps": 500,
381
+ "max_steps": 25070,
382
  "num_input_tokens_seen": 0,
383
+ "num_train_epochs": 10,
384
  "save_steps": 500,
385
  "stateful_callbacks": {
386
  "TrainerControl": {
 
394
  "attributes": {}
395
  }
396
  },
397
+ "total_flos": 2.620022518185984e+16,
398
  "train_batch_size": 16,
399
  "trial_name": null,
400
  "trial_params": null