hoangdeeptry commited on
Commit
1ff0aea
1 Parent(s): 8139f4e

End of training

Browse files
last-checkpoint/generation_config.json → generation_config.json RENAMED
File without changes
last-checkpoint/config.json DELETED
@@ -1,52 +0,0 @@
1
- {
2
- "_name_or_path": "duytran3112/whisper-sm-vivos",
3
- "activation_dropout": 0.0,
4
- "activation_function": "gelu",
5
- "apply_spec_augment": false,
6
- "architectures": [
7
- "WhisperForConditionalGeneration"
8
- ],
9
- "attention_dropout": 0.0,
10
- "begin_suppress_tokens": [
11
- 220,
12
- 50257
13
- ],
14
- "bos_token_id": 50257,
15
- "classifier_proj_size": 256,
16
- "d_model": 768,
17
- "decoder_attention_heads": 12,
18
- "decoder_ffn_dim": 3072,
19
- "decoder_layerdrop": 0.0,
20
- "decoder_layers": 12,
21
- "decoder_start_token_id": 50258,
22
- "dropout": 0.0,
23
- "encoder_attention_heads": 12,
24
- "encoder_ffn_dim": 3072,
25
- "encoder_layerdrop": 0.0,
26
- "encoder_layers": 12,
27
- "eos_token_id": 50257,
28
- "forced_decoder_ids": null,
29
- "init_std": 0.02,
30
- "is_encoder_decoder": true,
31
- "mask_feature_length": 10,
32
- "mask_feature_min_masks": 0,
33
- "mask_feature_prob": 0.0,
34
- "mask_time_length": 10,
35
- "mask_time_min_masks": 2,
36
- "mask_time_prob": 0.05,
37
- "max_length": 448,
38
- "max_source_positions": 1500,
39
- "max_target_positions": 448,
40
- "median_filter_width": 7,
41
- "model_type": "whisper",
42
- "num_hidden_layers": 12,
43
- "num_mel_bins": 80,
44
- "pad_token_id": 50257,
45
- "scale_embedding": false,
46
- "suppress_tokens": [],
47
- "torch_dtype": "float32",
48
- "transformers_version": "4.31.0",
49
- "use_cache": true,
50
- "use_weighted_layer_sum": false,
51
- "vocab_size": 51865
52
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
last-checkpoint/optimizer.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:5948a7ee44e31ea06a7e61c4b6e04d8656f582d82f6fb80834e5461835499046
3
- size 1934161157
 
 
 
 
last-checkpoint/preprocessor_config.json DELETED
@@ -1,14 +0,0 @@
1
- {
2
- "chunk_length": 30,
3
- "feature_extractor_type": "WhisperFeatureExtractor",
4
- "feature_size": 80,
5
- "hop_length": 160,
6
- "n_fft": 400,
7
- "n_samples": 480000,
8
- "nb_max_frames": 3000,
9
- "padding_side": "right",
10
- "padding_value": 0.0,
11
- "processor_class": "WhisperProcessor",
12
- "return_attention_mask": false,
13
- "sampling_rate": 16000
14
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
last-checkpoint/pytorch_model.bin DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:d4b876019bf694adee9c9238f3e1357059ae51ca004fd16638879e785129283b
3
- size 967102729
 
 
 
 
last-checkpoint/rng_state.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:fea25640b33473c2c750c09b44cf70fb34a378c8114c19ff3af953a9b65fac26
3
- size 14575
 
 
 
 
last-checkpoint/scheduler.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:3be3e16e7a5628d50352078d8c3f4228325e660ea5cf0f4ec5b03b9a13573f4c
3
- size 627
 
 
 
 
last-checkpoint/trainer_state.json DELETED
@@ -1,1016 +0,0 @@
1
- {
2
- "best_metric": 104.12573673870334,
3
- "best_model_checkpoint": "./whisper-vietnamese-3/checkpoint-4000",
4
- "epoch": 28.776978417266186,
5
- "global_step": 4000,
6
- "is_hyper_param_search": false,
7
- "is_local_process_zero": true,
8
- "is_world_process_zero": true,
9
- "log_history": [
10
- {
11
- "epoch": 0.18,
12
- "learning_rate": 4.6000000000000004e-07,
13
- "loss": 2.8534,
14
- "step": 25
15
- },
16
- {
17
- "epoch": 0.36,
18
- "learning_rate": 9.600000000000001e-07,
19
- "loss": 2.0345,
20
- "step": 50
21
- },
22
- {
23
- "epoch": 0.54,
24
- "learning_rate": 1.46e-06,
25
- "loss": 1.4058,
26
- "step": 75
27
- },
28
- {
29
- "epoch": 0.72,
30
- "learning_rate": 1.9600000000000003e-06,
31
- "loss": 1.0909,
32
- "step": 100
33
- },
34
- {
35
- "epoch": 0.9,
36
- "learning_rate": 2.46e-06,
37
- "loss": 0.889,
38
- "step": 125
39
- },
40
- {
41
- "epoch": 1.08,
42
- "learning_rate": 2.96e-06,
43
- "loss": 0.7838,
44
- "step": 150
45
- },
46
- {
47
- "epoch": 1.26,
48
- "learning_rate": 3.46e-06,
49
- "loss": 0.7152,
50
- "step": 175
51
- },
52
- {
53
- "epoch": 1.44,
54
- "learning_rate": 3.96e-06,
55
- "loss": 0.6819,
56
- "step": 200
57
- },
58
- {
59
- "epoch": 1.62,
60
- "learning_rate": 4.4600000000000005e-06,
61
- "loss": 0.6581,
62
- "step": 225
63
- },
64
- {
65
- "epoch": 1.8,
66
- "learning_rate": 4.960000000000001e-06,
67
- "loss": 0.6276,
68
- "step": 250
69
- },
70
- {
71
- "epoch": 1.98,
72
- "learning_rate": 5.460000000000001e-06,
73
- "loss": 0.6055,
74
- "step": 275
75
- },
76
- {
77
- "epoch": 2.16,
78
- "learning_rate": 5.9600000000000005e-06,
79
- "loss": 0.5197,
80
- "step": 300
81
- },
82
- {
83
- "epoch": 2.34,
84
- "learning_rate": 6.460000000000001e-06,
85
- "loss": 0.4698,
86
- "step": 325
87
- },
88
- {
89
- "epoch": 2.52,
90
- "learning_rate": 6.96e-06,
91
- "loss": 0.4506,
92
- "step": 350
93
- },
94
- {
95
- "epoch": 2.7,
96
- "learning_rate": 7.4600000000000006e-06,
97
- "loss": 0.427,
98
- "step": 375
99
- },
100
- {
101
- "epoch": 2.88,
102
- "learning_rate": 7.960000000000002e-06,
103
- "loss": 0.3566,
104
- "step": 400
105
- },
106
- {
107
- "epoch": 3.06,
108
- "learning_rate": 8.46e-06,
109
- "loss": 0.3178,
110
- "step": 425
111
- },
112
- {
113
- "epoch": 3.24,
114
- "learning_rate": 8.96e-06,
115
- "loss": 0.2256,
116
- "step": 450
117
- },
118
- {
119
- "epoch": 3.42,
120
- "learning_rate": 9.460000000000001e-06,
121
- "loss": 0.2437,
122
- "step": 475
123
- },
124
- {
125
- "epoch": 3.6,
126
- "learning_rate": 9.960000000000001e-06,
127
- "loss": 0.2447,
128
- "step": 500
129
- },
130
- {
131
- "epoch": 3.78,
132
- "learning_rate": 9.934285714285715e-06,
133
- "loss": 0.2431,
134
- "step": 525
135
- },
136
- {
137
- "epoch": 3.96,
138
- "learning_rate": 9.862857142857144e-06,
139
- "loss": 0.2328,
140
- "step": 550
141
- },
142
- {
143
- "epoch": 4.14,
144
- "learning_rate": 9.791428571428571e-06,
145
- "loss": 0.1619,
146
- "step": 575
147
- },
148
- {
149
- "epoch": 4.32,
150
- "learning_rate": 9.72e-06,
151
- "loss": 0.1336,
152
- "step": 600
153
- },
154
- {
155
- "epoch": 4.5,
156
- "learning_rate": 9.648571428571429e-06,
157
- "loss": 0.1345,
158
- "step": 625
159
- },
160
- {
161
- "epoch": 4.68,
162
- "learning_rate": 9.577142857142858e-06,
163
- "loss": 0.1369,
164
- "step": 650
165
- },
166
- {
167
- "epoch": 4.86,
168
- "learning_rate": 9.505714285714287e-06,
169
- "loss": 0.1368,
170
- "step": 675
171
- },
172
- {
173
- "epoch": 5.04,
174
- "learning_rate": 9.434285714285714e-06,
175
- "loss": 0.1359,
176
- "step": 700
177
- },
178
- {
179
- "epoch": 5.22,
180
- "learning_rate": 9.362857142857143e-06,
181
- "loss": 0.0739,
182
- "step": 725
183
- },
184
- {
185
- "epoch": 5.4,
186
- "learning_rate": 9.291428571428572e-06,
187
- "loss": 0.0754,
188
- "step": 750
189
- },
190
- {
191
- "epoch": 5.58,
192
- "learning_rate": 9.220000000000002e-06,
193
- "loss": 0.0755,
194
- "step": 775
195
- },
196
- {
197
- "epoch": 5.76,
198
- "learning_rate": 9.148571428571429e-06,
199
- "loss": 0.0732,
200
- "step": 800
201
- },
202
- {
203
- "epoch": 5.94,
204
- "learning_rate": 9.077142857142858e-06,
205
- "loss": 0.07,
206
- "step": 825
207
- },
208
- {
209
- "epoch": 6.12,
210
- "learning_rate": 9.005714285714287e-06,
211
- "loss": 0.0556,
212
- "step": 850
213
- },
214
- {
215
- "epoch": 6.29,
216
- "learning_rate": 8.934285714285716e-06,
217
- "loss": 0.0374,
218
- "step": 875
219
- },
220
- {
221
- "epoch": 6.47,
222
- "learning_rate": 8.862857142857143e-06,
223
- "loss": 0.0359,
224
- "step": 900
225
- },
226
- {
227
- "epoch": 6.65,
228
- "learning_rate": 8.791428571428572e-06,
229
- "loss": 0.0392,
230
- "step": 925
231
- },
232
- {
233
- "epoch": 6.83,
234
- "learning_rate": 8.720000000000001e-06,
235
- "loss": 0.0409,
236
- "step": 950
237
- },
238
- {
239
- "epoch": 7.01,
240
- "learning_rate": 8.64857142857143e-06,
241
- "loss": 0.0402,
242
- "step": 975
243
- },
244
- {
245
- "epoch": 7.19,
246
- "learning_rate": 8.577142857142858e-06,
247
- "loss": 0.0204,
248
- "step": 1000
249
- },
250
- {
251
- "epoch": 7.19,
252
- "eval_cer": 105.08606186100687,
253
- "eval_loss": 0.4899536669254303,
254
- "eval_runtime": 247.7316,
255
- "eval_samples_per_second": 0.997,
256
- "eval_steps_per_second": 0.125,
257
- "eval_wer": 123.61493123772102,
258
- "step": 1000
259
- },
260
- {
261
- "epoch": 7.37,
262
- "learning_rate": 8.505714285714287e-06,
263
- "loss": 0.0215,
264
- "step": 1025
265
- },
266
- {
267
- "epoch": 7.55,
268
- "learning_rate": 8.434285714285716e-06,
269
- "loss": 0.0214,
270
- "step": 1050
271
- },
272
- {
273
- "epoch": 7.73,
274
- "learning_rate": 8.362857142857143e-06,
275
- "loss": 0.0222,
276
- "step": 1075
277
- },
278
- {
279
- "epoch": 7.91,
280
- "learning_rate": 8.291428571428572e-06,
281
- "loss": 0.0234,
282
- "step": 1100
283
- },
284
- {
285
- "epoch": 8.09,
286
- "learning_rate": 8.220000000000001e-06,
287
- "loss": 0.016,
288
- "step": 1125
289
- },
290
- {
291
- "epoch": 8.27,
292
- "learning_rate": 8.148571428571428e-06,
293
- "loss": 0.0135,
294
- "step": 1150
295
- },
296
- {
297
- "epoch": 8.45,
298
- "learning_rate": 8.077142857142857e-06,
299
- "loss": 0.0144,
300
- "step": 1175
301
- },
302
- {
303
- "epoch": 8.63,
304
- "learning_rate": 8.005714285714286e-06,
305
- "loss": 0.0139,
306
- "step": 1200
307
- },
308
- {
309
- "epoch": 8.81,
310
- "learning_rate": 7.934285714285715e-06,
311
- "loss": 0.0126,
312
- "step": 1225
313
- },
314
- {
315
- "epoch": 8.99,
316
- "learning_rate": 7.862857142857143e-06,
317
- "loss": 0.0146,
318
- "step": 1250
319
- },
320
- {
321
- "epoch": 9.17,
322
- "learning_rate": 7.791428571428572e-06,
323
- "loss": 0.0089,
324
- "step": 1275
325
- },
326
- {
327
- "epoch": 9.35,
328
- "learning_rate": 7.72e-06,
329
- "loss": 0.0081,
330
- "step": 1300
331
- },
332
- {
333
- "epoch": 9.53,
334
- "learning_rate": 7.64857142857143e-06,
335
- "loss": 0.0094,
336
- "step": 1325
337
- },
338
- {
339
- "epoch": 9.71,
340
- "learning_rate": 7.577142857142857e-06,
341
- "loss": 0.0084,
342
- "step": 1350
343
- },
344
- {
345
- "epoch": 9.89,
346
- "learning_rate": 7.505714285714286e-06,
347
- "loss": 0.0075,
348
- "step": 1375
349
- },
350
- {
351
- "epoch": 10.07,
352
- "learning_rate": 7.434285714285715e-06,
353
- "loss": 0.0084,
354
- "step": 1400
355
- },
356
- {
357
- "epoch": 10.25,
358
- "learning_rate": 7.362857142857144e-06,
359
- "loss": 0.0063,
360
- "step": 1425
361
- },
362
- {
363
- "epoch": 10.43,
364
- "learning_rate": 7.291428571428571e-06,
365
- "loss": 0.0057,
366
- "step": 1450
367
- },
368
- {
369
- "epoch": 10.61,
370
- "learning_rate": 7.22e-06,
371
- "loss": 0.0055,
372
- "step": 1475
373
- },
374
- {
375
- "epoch": 10.79,
376
- "learning_rate": 7.148571428571429e-06,
377
- "loss": 0.0062,
378
- "step": 1500
379
- },
380
- {
381
- "epoch": 10.97,
382
- "learning_rate": 7.077142857142858e-06,
383
- "loss": 0.0068,
384
- "step": 1525
385
- },
386
- {
387
- "epoch": 11.15,
388
- "learning_rate": 7.0057142857142865e-06,
389
- "loss": 0.0041,
390
- "step": 1550
391
- },
392
- {
393
- "epoch": 11.33,
394
- "learning_rate": 6.934285714285715e-06,
395
- "loss": 0.0045,
396
- "step": 1575
397
- },
398
- {
399
- "epoch": 11.51,
400
- "learning_rate": 6.862857142857144e-06,
401
- "loss": 0.0047,
402
- "step": 1600
403
- },
404
- {
405
- "epoch": 11.69,
406
- "learning_rate": 6.791428571428572e-06,
407
- "loss": 0.0048,
408
- "step": 1625
409
- },
410
- {
411
- "epoch": 11.87,
412
- "learning_rate": 6.720000000000001e-06,
413
- "loss": 0.0037,
414
- "step": 1650
415
- },
416
- {
417
- "epoch": 12.05,
418
- "learning_rate": 6.648571428571429e-06,
419
- "loss": 0.0034,
420
- "step": 1675
421
- },
422
- {
423
- "epoch": 12.23,
424
- "learning_rate": 6.577142857142857e-06,
425
- "loss": 0.0031,
426
- "step": 1700
427
- },
428
- {
429
- "epoch": 12.41,
430
- "learning_rate": 6.505714285714286e-06,
431
- "loss": 0.003,
432
- "step": 1725
433
- },
434
- {
435
- "epoch": 12.59,
436
- "learning_rate": 6.434285714285715e-06,
437
- "loss": 0.0044,
438
- "step": 1750
439
- },
440
- {
441
- "epoch": 12.77,
442
- "learning_rate": 6.3628571428571426e-06,
443
- "loss": 0.0033,
444
- "step": 1775
445
- },
446
- {
447
- "epoch": 12.95,
448
- "learning_rate": 6.2914285714285716e-06,
449
- "loss": 0.0032,
450
- "step": 1800
451
- },
452
- {
453
- "epoch": 13.13,
454
- "learning_rate": 6.220000000000001e-06,
455
- "loss": 0.0032,
456
- "step": 1825
457
- },
458
- {
459
- "epoch": 13.31,
460
- "learning_rate": 6.14857142857143e-06,
461
- "loss": 0.0027,
462
- "step": 1850
463
- },
464
- {
465
- "epoch": 13.49,
466
- "learning_rate": 6.077142857142858e-06,
467
- "loss": 0.0029,
468
- "step": 1875
469
- },
470
- {
471
- "epoch": 13.67,
472
- "learning_rate": 6.005714285714286e-06,
473
- "loss": 0.0028,
474
- "step": 1900
475
- },
476
- {
477
- "epoch": 13.85,
478
- "learning_rate": 5.934285714285715e-06,
479
- "loss": 0.0023,
480
- "step": 1925
481
- },
482
- {
483
- "epoch": 14.03,
484
- "learning_rate": 5.862857142857143e-06,
485
- "loss": 0.0019,
486
- "step": 1950
487
- },
488
- {
489
- "epoch": 14.21,
490
- "learning_rate": 5.791428571428572e-06,
491
- "loss": 0.0029,
492
- "step": 1975
493
- },
494
- {
495
- "epoch": 14.39,
496
- "learning_rate": 5.72e-06,
497
- "loss": 0.0022,
498
- "step": 2000
499
- },
500
- {
501
- "epoch": 14.39,
502
- "eval_cer": 104.70151047347889,
503
- "eval_loss": 0.5671390295028687,
504
- "eval_runtime": 247.6486,
505
- "eval_samples_per_second": 0.997,
506
- "eval_steps_per_second": 0.125,
507
- "eval_wer": 111.96070726915521,
508
- "step": 2000
509
- },
510
- {
511
- "epoch": 14.57,
512
- "learning_rate": 5.6485714285714285e-06,
513
- "loss": 0.0027,
514
- "step": 2025
515
- },
516
- {
517
- "epoch": 14.75,
518
- "learning_rate": 5.5771428571428575e-06,
519
- "loss": 0.0024,
520
- "step": 2050
521
- },
522
- {
523
- "epoch": 14.93,
524
- "learning_rate": 5.5057142857142865e-06,
525
- "loss": 0.0029,
526
- "step": 2075
527
- },
528
- {
529
- "epoch": 15.11,
530
- "learning_rate": 5.4342857142857155e-06,
531
- "loss": 0.0016,
532
- "step": 2100
533
- },
534
- {
535
- "epoch": 15.29,
536
- "learning_rate": 5.362857142857143e-06,
537
- "loss": 0.0018,
538
- "step": 2125
539
- },
540
- {
541
- "epoch": 15.47,
542
- "learning_rate": 5.291428571428572e-06,
543
- "loss": 0.0018,
544
- "step": 2150
545
- },
546
- {
547
- "epoch": 15.65,
548
- "learning_rate": 5.220000000000001e-06,
549
- "loss": 0.0027,
550
- "step": 2175
551
- },
552
- {
553
- "epoch": 15.83,
554
- "learning_rate": 5.14857142857143e-06,
555
- "loss": 0.0015,
556
- "step": 2200
557
- },
558
- {
559
- "epoch": 16.01,
560
- "learning_rate": 5.077142857142857e-06,
561
- "loss": 0.0016,
562
- "step": 2225
563
- },
564
- {
565
- "epoch": 16.19,
566
- "learning_rate": 5.005714285714286e-06,
567
- "loss": 0.0013,
568
- "step": 2250
569
- },
570
- {
571
- "epoch": 16.37,
572
- "learning_rate": 4.934285714285715e-06,
573
- "loss": 0.002,
574
- "step": 2275
575
- },
576
- {
577
- "epoch": 16.55,
578
- "learning_rate": 4.862857142857143e-06,
579
- "loss": 0.0019,
580
- "step": 2300
581
- },
582
- {
583
- "epoch": 16.73,
584
- "learning_rate": 4.7914285714285715e-06,
585
- "loss": 0.0013,
586
- "step": 2325
587
- },
588
- {
589
- "epoch": 16.91,
590
- "learning_rate": 4.7200000000000005e-06,
591
- "loss": 0.0017,
592
- "step": 2350
593
- },
594
- {
595
- "epoch": 17.09,
596
- "learning_rate": 4.648571428571429e-06,
597
- "loss": 0.0015,
598
- "step": 2375
599
- },
600
- {
601
- "epoch": 17.27,
602
- "learning_rate": 4.577142857142858e-06,
603
- "loss": 0.0013,
604
- "step": 2400
605
- },
606
- {
607
- "epoch": 17.45,
608
- "learning_rate": 4.505714285714286e-06,
609
- "loss": 0.0017,
610
- "step": 2425
611
- },
612
- {
613
- "epoch": 17.63,
614
- "learning_rate": 4.434285714285715e-06,
615
- "loss": 0.0019,
616
- "step": 2450
617
- },
618
- {
619
- "epoch": 17.81,
620
- "learning_rate": 4.362857142857143e-06,
621
- "loss": 0.002,
622
- "step": 2475
623
- },
624
- {
625
- "epoch": 17.99,
626
- "learning_rate": 4.291428571428572e-06,
627
- "loss": 0.0017,
628
- "step": 2500
629
- },
630
- {
631
- "epoch": 18.17,
632
- "learning_rate": 4.22e-06,
633
- "loss": 0.001,
634
- "step": 2525
635
- },
636
- {
637
- "epoch": 18.35,
638
- "learning_rate": 4.148571428571429e-06,
639
- "loss": 0.0009,
640
- "step": 2550
641
- },
642
- {
643
- "epoch": 18.53,
644
- "learning_rate": 4.0771428571428574e-06,
645
- "loss": 0.0012,
646
- "step": 2575
647
- },
648
- {
649
- "epoch": 18.71,
650
- "learning_rate": 4.0057142857142864e-06,
651
- "loss": 0.0016,
652
- "step": 2600
653
- },
654
- {
655
- "epoch": 18.88,
656
- "learning_rate": 3.934285714285715e-06,
657
- "loss": 0.0018,
658
- "step": 2625
659
- },
660
- {
661
- "epoch": 19.06,
662
- "learning_rate": 3.862857142857143e-06,
663
- "loss": 0.0014,
664
- "step": 2650
665
- },
666
- {
667
- "epoch": 19.24,
668
- "learning_rate": 3.7914285714285722e-06,
669
- "loss": 0.0012,
670
- "step": 2675
671
- },
672
- {
673
- "epoch": 19.42,
674
- "learning_rate": 3.7200000000000004e-06,
675
- "loss": 0.001,
676
- "step": 2700
677
- },
678
- {
679
- "epoch": 19.6,
680
- "learning_rate": 3.648571428571429e-06,
681
- "loss": 0.0014,
682
- "step": 2725
683
- },
684
- {
685
- "epoch": 19.78,
686
- "learning_rate": 3.5771428571428576e-06,
687
- "loss": 0.0013,
688
- "step": 2750
689
- },
690
- {
691
- "epoch": 19.96,
692
- "learning_rate": 3.505714285714286e-06,
693
- "loss": 0.0009,
694
- "step": 2775
695
- },
696
- {
697
- "epoch": 20.14,
698
- "learning_rate": 3.4342857142857143e-06,
699
- "loss": 0.0008,
700
- "step": 2800
701
- },
702
- {
703
- "epoch": 20.32,
704
- "learning_rate": 3.3628571428571433e-06,
705
- "loss": 0.0016,
706
- "step": 2825
707
- },
708
- {
709
- "epoch": 20.5,
710
- "learning_rate": 3.2914285714285715e-06,
711
- "loss": 0.0008,
712
- "step": 2850
713
- },
714
- {
715
- "epoch": 20.68,
716
- "learning_rate": 3.2200000000000005e-06,
717
- "loss": 0.0014,
718
- "step": 2875
719
- },
720
- {
721
- "epoch": 20.86,
722
- "learning_rate": 3.1485714285714287e-06,
723
- "loss": 0.001,
724
- "step": 2900
725
- },
726
- {
727
- "epoch": 21.04,
728
- "learning_rate": 3.0771428571428573e-06,
729
- "loss": 0.0009,
730
- "step": 2925
731
- },
732
- {
733
- "epoch": 21.22,
734
- "learning_rate": 3.005714285714286e-06,
735
- "loss": 0.0007,
736
- "step": 2950
737
- },
738
- {
739
- "epoch": 21.4,
740
- "learning_rate": 2.9342857142857144e-06,
741
- "loss": 0.0011,
742
- "step": 2975
743
- },
744
- {
745
- "epoch": 21.58,
746
- "learning_rate": 2.8628571428571435e-06,
747
- "loss": 0.0009,
748
- "step": 3000
749
- },
750
- {
751
- "epoch": 21.58,
752
- "eval_cer": 101.53081033112092,
753
- "eval_loss": 0.6016765236854553,
754
- "eval_runtime": 253.4179,
755
- "eval_samples_per_second": 0.975,
756
- "eval_steps_per_second": 0.122,
757
- "eval_wer": 109.13163064833007,
758
- "step": 3000
759
- },
760
- {
761
- "epoch": 21.76,
762
- "learning_rate": 2.7914285714285716e-06,
763
- "loss": 0.0007,
764
- "step": 3025
765
- },
766
- {
767
- "epoch": 21.94,
768
- "learning_rate": 2.7200000000000002e-06,
769
- "loss": 0.0008,
770
- "step": 3050
771
- },
772
- {
773
- "epoch": 22.12,
774
- "learning_rate": 2.648571428571429e-06,
775
- "loss": 0.0008,
776
- "step": 3075
777
- },
778
- {
779
- "epoch": 22.3,
780
- "learning_rate": 2.5771428571428574e-06,
781
- "loss": 0.0008,
782
- "step": 3100
783
- },
784
- {
785
- "epoch": 22.48,
786
- "learning_rate": 2.5057142857142856e-06,
787
- "loss": 0.0007,
788
- "step": 3125
789
- },
790
- {
791
- "epoch": 22.66,
792
- "learning_rate": 2.4342857142857146e-06,
793
- "loss": 0.0007,
794
- "step": 3150
795
- },
796
- {
797
- "epoch": 22.84,
798
- "learning_rate": 2.362857142857143e-06,
799
- "loss": 0.0007,
800
- "step": 3175
801
- },
802
- {
803
- "epoch": 23.02,
804
- "learning_rate": 2.2914285714285718e-06,
805
- "loss": 0.0008,
806
- "step": 3200
807
- },
808
- {
809
- "epoch": 23.2,
810
- "learning_rate": 2.2200000000000003e-06,
811
- "loss": 0.0007,
812
- "step": 3225
813
- },
814
- {
815
- "epoch": 23.38,
816
- "learning_rate": 2.148571428571429e-06,
817
- "loss": 0.0007,
818
- "step": 3250
819
- },
820
- {
821
- "epoch": 23.56,
822
- "learning_rate": 2.077142857142857e-06,
823
- "loss": 0.0007,
824
- "step": 3275
825
- },
826
- {
827
- "epoch": 23.74,
828
- "learning_rate": 2.0057142857142857e-06,
829
- "loss": 0.0007,
830
- "step": 3300
831
- },
832
- {
833
- "epoch": 23.92,
834
- "learning_rate": 1.9342857142857143e-06,
835
- "loss": 0.0007,
836
- "step": 3325
837
- },
838
- {
839
- "epoch": 24.1,
840
- "learning_rate": 1.8628571428571429e-06,
841
- "loss": 0.0007,
842
- "step": 3350
843
- },
844
- {
845
- "epoch": 24.28,
846
- "learning_rate": 1.7914285714285715e-06,
847
- "loss": 0.0007,
848
- "step": 3375
849
- },
850
- {
851
- "epoch": 24.46,
852
- "learning_rate": 1.72e-06,
853
- "loss": 0.0006,
854
- "step": 3400
855
- },
856
- {
857
- "epoch": 24.64,
858
- "learning_rate": 1.6485714285714289e-06,
859
- "loss": 0.0006,
860
- "step": 3425
861
- },
862
- {
863
- "epoch": 24.82,
864
- "learning_rate": 1.5771428571428574e-06,
865
- "loss": 0.0006,
866
- "step": 3450
867
- },
868
- {
869
- "epoch": 25.0,
870
- "learning_rate": 1.5057142857142858e-06,
871
- "loss": 0.0006,
872
- "step": 3475
873
- },
874
- {
875
- "epoch": 25.18,
876
- "learning_rate": 1.4342857142857144e-06,
877
- "loss": 0.0007,
878
- "step": 3500
879
- },
880
- {
881
- "epoch": 25.36,
882
- "learning_rate": 1.362857142857143e-06,
883
- "loss": 0.0006,
884
- "step": 3525
885
- },
886
- {
887
- "epoch": 25.54,
888
- "learning_rate": 1.2914285714285716e-06,
889
- "loss": 0.0006,
890
- "step": 3550
891
- },
892
- {
893
- "epoch": 25.72,
894
- "learning_rate": 1.2200000000000002e-06,
895
- "loss": 0.0006,
896
- "step": 3575
897
- },
898
- {
899
- "epoch": 25.9,
900
- "learning_rate": 1.1485714285714286e-06,
901
- "loss": 0.0006,
902
- "step": 3600
903
- },
904
- {
905
- "epoch": 26.08,
906
- "learning_rate": 1.0771428571428574e-06,
907
- "loss": 0.0006,
908
- "step": 3625
909
- },
910
- {
911
- "epoch": 26.26,
912
- "learning_rate": 1.0057142857142857e-06,
913
- "loss": 0.0006,
914
- "step": 3650
915
- },
916
- {
917
- "epoch": 26.44,
918
- "learning_rate": 9.342857142857144e-07,
919
- "loss": 0.0006,
920
- "step": 3675
921
- },
922
- {
923
- "epoch": 26.62,
924
- "learning_rate": 8.628571428571429e-07,
925
- "loss": 0.0006,
926
- "step": 3700
927
- },
928
- {
929
- "epoch": 26.8,
930
- "learning_rate": 7.914285714285715e-07,
931
- "loss": 0.0006,
932
- "step": 3725
933
- },
934
- {
935
- "epoch": 26.98,
936
- "learning_rate": 7.2e-07,
937
- "loss": 0.0007,
938
- "step": 3750
939
- },
940
- {
941
- "epoch": 27.16,
942
- "learning_rate": 6.485714285714287e-07,
943
- "loss": 0.0006,
944
- "step": 3775
945
- },
946
- {
947
- "epoch": 27.34,
948
- "learning_rate": 5.771428571428572e-07,
949
- "loss": 0.0006,
950
- "step": 3800
951
- },
952
- {
953
- "epoch": 27.52,
954
- "learning_rate": 5.057142857142858e-07,
955
- "loss": 0.0006,
956
- "step": 3825
957
- },
958
- {
959
- "epoch": 27.7,
960
- "learning_rate": 4.342857142857143e-07,
961
- "loss": 0.0006,
962
- "step": 3850
963
- },
964
- {
965
- "epoch": 27.88,
966
- "learning_rate": 3.6285714285714283e-07,
967
- "loss": 0.0006,
968
- "step": 3875
969
- },
970
- {
971
- "epoch": 28.06,
972
- "learning_rate": 2.914285714285715e-07,
973
- "loss": 0.0006,
974
- "step": 3900
975
- },
976
- {
977
- "epoch": 28.24,
978
- "learning_rate": 2.2e-07,
979
- "loss": 0.0006,
980
- "step": 3925
981
- },
982
- {
983
- "epoch": 28.42,
984
- "learning_rate": 1.4857142857142857e-07,
985
- "loss": 0.0006,
986
- "step": 3950
987
- },
988
- {
989
- "epoch": 28.6,
990
- "learning_rate": 7.714285714285715e-08,
991
- "loss": 0.0006,
992
- "step": 3975
993
- },
994
- {
995
- "epoch": 28.78,
996
- "learning_rate": 5.714285714285715e-09,
997
- "loss": 0.0006,
998
- "step": 4000
999
- },
1000
- {
1001
- "epoch": 28.78,
1002
- "eval_cer": 100.82641572223558,
1003
- "eval_loss": 0.6164063811302185,
1004
- "eval_runtime": 252.3206,
1005
- "eval_samples_per_second": 0.979,
1006
- "eval_steps_per_second": 0.123,
1007
- "eval_wer": 104.12573673870334,
1008
- "step": 4000
1009
- }
1010
- ],
1011
- "max_steps": 4000,
1012
- "num_train_epochs": 29,
1013
- "total_flos": 1.841290288201728e+19,
1014
- "trial_name": null,
1015
- "trial_params": null
1016
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
last-checkpoint/training_args.bin DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:23859ad4c36f1f528ab725dd42b5fc2ec1b855a2332883c733b13d2ed717dbba
3
- size 4091
 
 
 
 
runs/Aug17_16-46-37_8582233f681d/events.out.tfevents.1692290809.8582233f681d.216.0 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:888a1289dfa30d5e985b32553043fa759e533438856c0461c16f165bcba19c46
3
- size 31466
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:839989bb6c873bea4902f6fe856e323f55da2890cb46f5cd4a0f1c8b89a8df87
3
+ size 31820