JRHuy commited on
Commit
3acd1ca
1 Parent(s): 389e03f

End of training

Browse files
last-checkpoint/generation_config.json → generation_config.json RENAMED
File without changes
last-checkpoint/config.json DELETED
@@ -1,152 +0,0 @@
1
- {
2
- "_name_or_path": "openai/whisper-small",
3
- "activation_dropout": 0.0,
4
- "activation_function": "gelu",
5
- "apply_spec_augment": false,
6
- "architectures": [
7
- "WhisperForConditionalGeneration"
8
- ],
9
- "attention_dropout": 0.0,
10
- "begin_suppress_tokens": [
11
- 220,
12
- 50257
13
- ],
14
- "bos_token_id": 50257,
15
- "classifier_proj_size": 256,
16
- "d_model": 768,
17
- "decoder_attention_heads": 12,
18
- "decoder_ffn_dim": 3072,
19
- "decoder_layerdrop": 0.0,
20
- "decoder_layers": 12,
21
- "decoder_start_token_id": 50258,
22
- "dropout": 0.0,
23
- "encoder_attention_heads": 12,
24
- "encoder_ffn_dim": 3072,
25
- "encoder_layerdrop": 0.0,
26
- "encoder_layers": 12,
27
- "eos_token_id": 50257,
28
- "forced_decoder_ids": [
29
- [
30
- 1,
31
- 50259
32
- ],
33
- [
34
- 2,
35
- 50359
36
- ],
37
- [
38
- 3,
39
- 50363
40
- ]
41
- ],
42
- "init_std": 0.02,
43
- "is_encoder_decoder": true,
44
- "mask_feature_length": 10,
45
- "mask_feature_min_masks": 0,
46
- "mask_feature_prob": 0.0,
47
- "mask_time_length": 10,
48
- "mask_time_min_masks": 2,
49
- "mask_time_prob": 0.05,
50
- "max_length": 448,
51
- "max_source_positions": 1500,
52
- "max_target_positions": 448,
53
- "median_filter_width": 7,
54
- "model_type": "whisper",
55
- "num_hidden_layers": 12,
56
- "num_mel_bins": 80,
57
- "pad_token_id": 50257,
58
- "scale_embedding": false,
59
- "suppress_tokens": [
60
- 1,
61
- 2,
62
- 7,
63
- 8,
64
- 9,
65
- 10,
66
- 14,
67
- 25,
68
- 26,
69
- 27,
70
- 28,
71
- 29,
72
- 31,
73
- 58,
74
- 59,
75
- 60,
76
- 61,
77
- 62,
78
- 63,
79
- 90,
80
- 91,
81
- 92,
82
- 93,
83
- 359,
84
- 503,
85
- 522,
86
- 542,
87
- 873,
88
- 893,
89
- 902,
90
- 918,
91
- 922,
92
- 931,
93
- 1350,
94
- 1853,
95
- 1982,
96
- 2460,
97
- 2627,
98
- 3246,
99
- 3253,
100
- 3268,
101
- 3536,
102
- 3846,
103
- 3961,
104
- 4183,
105
- 4667,
106
- 6585,
107
- 6647,
108
- 7273,
109
- 9061,
110
- 9383,
111
- 10428,
112
- 10929,
113
- 11938,
114
- 12033,
115
- 12331,
116
- 12562,
117
- 13793,
118
- 14157,
119
- 14635,
120
- 15265,
121
- 15618,
122
- 16553,
123
- 16604,
124
- 18362,
125
- 18956,
126
- 20075,
127
- 21675,
128
- 22520,
129
- 26130,
130
- 26161,
131
- 26435,
132
- 28279,
133
- 29464,
134
- 31650,
135
- 32302,
136
- 32470,
137
- 36865,
138
- 42863,
139
- 47425,
140
- 49870,
141
- 50254,
142
- 50258,
143
- 50360,
144
- 50361,
145
- 50362
146
- ],
147
- "torch_dtype": "float32",
148
- "transformers_version": "4.31.0",
149
- "use_cache": false,
150
- "use_weighted_layer_sum": false,
151
- "vocab_size": 51865
152
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
last-checkpoint/optimizer.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:d2eeaa3e0dd9026beaf9e1275fc1de9ebf175e2cdcb2016072c06c0f3625fab9
3
- size 1934161157
 
 
 
 
last-checkpoint/preprocessor_config.json DELETED
@@ -1,14 +0,0 @@
1
- {
2
- "chunk_length": 30,
3
- "feature_extractor_type": "WhisperFeatureExtractor",
4
- "feature_size": 80,
5
- "hop_length": 160,
6
- "n_fft": 400,
7
- "n_samples": 480000,
8
- "nb_max_frames": 3000,
9
- "padding_side": "right",
10
- "padding_value": 0.0,
11
- "processor_class": "WhisperProcessor",
12
- "return_attention_mask": false,
13
- "sampling_rate": 16000
14
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
last-checkpoint/pytorch_model.bin DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:0529397fb768bff68c9e2f5a80dadfcde0b2f3be588cb77e11b29df524c2ac23
3
- size 967102729
 
 
 
 
last-checkpoint/rng_state.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:bb08efb4868a5313ffdecb076fda7bef23ca071e8b0db9ad3e0f977f931a25bb
3
- size 14575
 
 
 
 
last-checkpoint/scheduler.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:c05c5f02f34690b6cdc36257cc6f4b53027dee7ea004e5e07023153262401341
3
- size 627
 
 
 
 
last-checkpoint/trainer_state.json DELETED
@@ -1,1016 +0,0 @@
1
- {
2
- "best_metric": 17.63791763791764,
3
- "best_model_checkpoint": "./whisper-small-vivos/checkpoint-4000",
4
- "epoch": 5.486968449931413,
5
- "global_step": 4000,
6
- "is_hyper_param_search": false,
7
- "is_local_process_zero": true,
8
- "is_world_process_zero": true,
9
- "log_history": [
10
- {
11
- "epoch": 0.03,
12
- "learning_rate": 4.6000000000000004e-07,
13
- "loss": 3.0428,
14
- "step": 25
15
- },
16
- {
17
- "epoch": 0.07,
18
- "learning_rate": 9.600000000000001e-07,
19
- "loss": 2.6303,
20
- "step": 50
21
- },
22
- {
23
- "epoch": 0.1,
24
- "learning_rate": 1.46e-06,
25
- "loss": 2.0752,
26
- "step": 75
27
- },
28
- {
29
- "epoch": 0.14,
30
- "learning_rate": 1.9600000000000003e-06,
31
- "loss": 1.5733,
32
- "step": 100
33
- },
34
- {
35
- "epoch": 0.17,
36
- "learning_rate": 2.46e-06,
37
- "loss": 1.1017,
38
- "step": 125
39
- },
40
- {
41
- "epoch": 0.21,
42
- "learning_rate": 2.96e-06,
43
- "loss": 0.9,
44
- "step": 150
45
- },
46
- {
47
- "epoch": 0.24,
48
- "learning_rate": 3.46e-06,
49
- "loss": 0.7899,
50
- "step": 175
51
- },
52
- {
53
- "epoch": 0.27,
54
- "learning_rate": 3.96e-06,
55
- "loss": 0.6859,
56
- "step": 200
57
- },
58
- {
59
- "epoch": 0.31,
60
- "learning_rate": 4.4600000000000005e-06,
61
- "loss": 0.6151,
62
- "step": 225
63
- },
64
- {
65
- "epoch": 0.34,
66
- "learning_rate": 4.960000000000001e-06,
67
- "loss": 0.539,
68
- "step": 250
69
- },
70
- {
71
- "epoch": 0.38,
72
- "learning_rate": 5.460000000000001e-06,
73
- "loss": 0.37,
74
- "step": 275
75
- },
76
- {
77
- "epoch": 0.41,
78
- "learning_rate": 5.9600000000000005e-06,
79
- "loss": 0.3106,
80
- "step": 300
81
- },
82
- {
83
- "epoch": 0.45,
84
- "learning_rate": 6.460000000000001e-06,
85
- "loss": 0.2854,
86
- "step": 325
87
- },
88
- {
89
- "epoch": 0.48,
90
- "learning_rate": 6.96e-06,
91
- "loss": 0.2651,
92
- "step": 350
93
- },
94
- {
95
- "epoch": 0.51,
96
- "learning_rate": 7.4600000000000006e-06,
97
- "loss": 0.2473,
98
- "step": 375
99
- },
100
- {
101
- "epoch": 0.55,
102
- "learning_rate": 7.960000000000002e-06,
103
- "loss": 0.2256,
104
- "step": 400
105
- },
106
- {
107
- "epoch": 0.58,
108
- "learning_rate": 8.46e-06,
109
- "loss": 0.2195,
110
- "step": 425
111
- },
112
- {
113
- "epoch": 0.62,
114
- "learning_rate": 8.96e-06,
115
- "loss": 0.2266,
116
- "step": 450
117
- },
118
- {
119
- "epoch": 0.65,
120
- "learning_rate": 9.460000000000001e-06,
121
- "loss": 0.216,
122
- "step": 475
123
- },
124
- {
125
- "epoch": 0.69,
126
- "learning_rate": 9.960000000000001e-06,
127
- "loss": 0.2294,
128
- "step": 500
129
- },
130
- {
131
- "epoch": 0.72,
132
- "learning_rate": 1e-05,
133
- "loss": 0.1918,
134
- "step": 525
135
- },
136
- {
137
- "epoch": 0.75,
138
- "learning_rate": 1e-05,
139
- "loss": 0.2152,
140
- "step": 550
141
- },
142
- {
143
- "epoch": 0.79,
144
- "learning_rate": 1e-05,
145
- "loss": 0.187,
146
- "step": 575
147
- },
148
- {
149
- "epoch": 0.82,
150
- "learning_rate": 1e-05,
151
- "loss": 0.1756,
152
- "step": 600
153
- },
154
- {
155
- "epoch": 0.86,
156
- "learning_rate": 1e-05,
157
- "loss": 0.1862,
158
- "step": 625
159
- },
160
- {
161
- "epoch": 0.89,
162
- "learning_rate": 1e-05,
163
- "loss": 0.1744,
164
- "step": 650
165
- },
166
- {
167
- "epoch": 0.93,
168
- "learning_rate": 1e-05,
169
- "loss": 0.1764,
170
- "step": 675
171
- },
172
- {
173
- "epoch": 0.96,
174
- "learning_rate": 1e-05,
175
- "loss": 0.1662,
176
- "step": 700
177
- },
178
- {
179
- "epoch": 0.99,
180
- "learning_rate": 1e-05,
181
- "loss": 0.1788,
182
- "step": 725
183
- },
184
- {
185
- "epoch": 1.03,
186
- "learning_rate": 1e-05,
187
- "loss": 0.1472,
188
- "step": 750
189
- },
190
- {
191
- "epoch": 1.06,
192
- "learning_rate": 1e-05,
193
- "loss": 0.1381,
194
- "step": 775
195
- },
196
- {
197
- "epoch": 1.1,
198
- "learning_rate": 1e-05,
199
- "loss": 0.1277,
200
- "step": 800
201
- },
202
- {
203
- "epoch": 1.13,
204
- "learning_rate": 1e-05,
205
- "loss": 0.13,
206
- "step": 825
207
- },
208
- {
209
- "epoch": 1.17,
210
- "learning_rate": 1e-05,
211
- "loss": 0.1309,
212
- "step": 850
213
- },
214
- {
215
- "epoch": 1.2,
216
- "learning_rate": 1e-05,
217
- "loss": 0.1279,
218
- "step": 875
219
- },
220
- {
221
- "epoch": 1.23,
222
- "learning_rate": 1e-05,
223
- "loss": 0.1221,
224
- "step": 900
225
- },
226
- {
227
- "epoch": 1.27,
228
- "learning_rate": 1e-05,
229
- "loss": 0.1263,
230
- "step": 925
231
- },
232
- {
233
- "epoch": 1.3,
234
- "learning_rate": 1e-05,
235
- "loss": 0.122,
236
- "step": 950
237
- },
238
- {
239
- "epoch": 1.34,
240
- "learning_rate": 1e-05,
241
- "loss": 0.1251,
242
- "step": 975
243
- },
244
- {
245
- "epoch": 1.37,
246
- "learning_rate": 1e-05,
247
- "loss": 0.1276,
248
- "step": 1000
249
- },
250
- {
251
- "epoch": 1.37,
252
- "eval_cer": 8.6875591223948,
253
- "eval_loss": 0.21370840072631836,
254
- "eval_runtime": 393.7923,
255
- "eval_samples_per_second": 1.93,
256
- "eval_steps_per_second": 0.122,
257
- "eval_wer": 20.616420616420616,
258
- "step": 1000
259
- },
260
- {
261
- "epoch": 1.41,
262
- "learning_rate": 1e-05,
263
- "loss": 0.1184,
264
- "step": 1025
265
- },
266
- {
267
- "epoch": 1.44,
268
- "learning_rate": 1e-05,
269
- "loss": 0.125,
270
- "step": 1050
271
- },
272
- {
273
- "epoch": 1.47,
274
- "learning_rate": 1e-05,
275
- "loss": 0.1163,
276
- "step": 1075
277
- },
278
- {
279
- "epoch": 1.51,
280
- "learning_rate": 1e-05,
281
- "loss": 0.1263,
282
- "step": 1100
283
- },
284
- {
285
- "epoch": 1.54,
286
- "learning_rate": 1e-05,
287
- "loss": 0.1129,
288
- "step": 1125
289
- },
290
- {
291
- "epoch": 1.58,
292
- "learning_rate": 1e-05,
293
- "loss": 0.1199,
294
- "step": 1150
295
- },
296
- {
297
- "epoch": 1.61,
298
- "learning_rate": 1e-05,
299
- "loss": 0.1231,
300
- "step": 1175
301
- },
302
- {
303
- "epoch": 1.65,
304
- "learning_rate": 1e-05,
305
- "loss": 0.1195,
306
- "step": 1200
307
- },
308
- {
309
- "epoch": 1.68,
310
- "learning_rate": 1e-05,
311
- "loss": 0.1145,
312
- "step": 1225
313
- },
314
- {
315
- "epoch": 1.71,
316
- "learning_rate": 1e-05,
317
- "loss": 0.1115,
318
- "step": 1250
319
- },
320
- {
321
- "epoch": 1.75,
322
- "learning_rate": 1e-05,
323
- "loss": 0.117,
324
- "step": 1275
325
- },
326
- {
327
- "epoch": 1.78,
328
- "learning_rate": 1e-05,
329
- "loss": 0.1089,
330
- "step": 1300
331
- },
332
- {
333
- "epoch": 1.82,
334
- "learning_rate": 1e-05,
335
- "loss": 0.1143,
336
- "step": 1325
337
- },
338
- {
339
- "epoch": 1.85,
340
- "learning_rate": 1e-05,
341
- "loss": 0.1195,
342
- "step": 1350
343
- },
344
- {
345
- "epoch": 1.89,
346
- "learning_rate": 1e-05,
347
- "loss": 0.1137,
348
- "step": 1375
349
- },
350
- {
351
- "epoch": 1.92,
352
- "learning_rate": 1e-05,
353
- "loss": 0.1142,
354
- "step": 1400
355
- },
356
- {
357
- "epoch": 1.95,
358
- "learning_rate": 1e-05,
359
- "loss": 0.1022,
360
- "step": 1425
361
- },
362
- {
363
- "epoch": 1.99,
364
- "learning_rate": 1e-05,
365
- "loss": 0.1,
366
- "step": 1450
367
- },
368
- {
369
- "epoch": 2.02,
370
- "learning_rate": 1e-05,
371
- "loss": 0.0854,
372
- "step": 1475
373
- },
374
- {
375
- "epoch": 2.06,
376
- "learning_rate": 1e-05,
377
- "loss": 0.0707,
378
- "step": 1500
379
- },
380
- {
381
- "epoch": 2.09,
382
- "learning_rate": 1e-05,
383
- "loss": 0.0702,
384
- "step": 1525
385
- },
386
- {
387
- "epoch": 2.13,
388
- "learning_rate": 1e-05,
389
- "loss": 0.0647,
390
- "step": 1550
391
- },
392
- {
393
- "epoch": 2.16,
394
- "learning_rate": 1e-05,
395
- "loss": 0.0713,
396
- "step": 1575
397
- },
398
- {
399
- "epoch": 2.19,
400
- "learning_rate": 1e-05,
401
- "loss": 0.0627,
402
- "step": 1600
403
- },
404
- {
405
- "epoch": 2.23,
406
- "learning_rate": 1e-05,
407
- "loss": 0.0678,
408
- "step": 1625
409
- },
410
- {
411
- "epoch": 2.26,
412
- "learning_rate": 1e-05,
413
- "loss": 0.0667,
414
- "step": 1650
415
- },
416
- {
417
- "epoch": 2.3,
418
- "learning_rate": 1e-05,
419
- "loss": 0.0795,
420
- "step": 1675
421
- },
422
- {
423
- "epoch": 2.33,
424
- "learning_rate": 1e-05,
425
- "loss": 0.0608,
426
- "step": 1700
427
- },
428
- {
429
- "epoch": 2.37,
430
- "learning_rate": 1e-05,
431
- "loss": 0.0642,
432
- "step": 1725
433
- },
434
- {
435
- "epoch": 2.4,
436
- "learning_rate": 1e-05,
437
- "loss": 0.0684,
438
- "step": 1750
439
- },
440
- {
441
- "epoch": 2.43,
442
- "learning_rate": 1e-05,
443
- "loss": 0.0676,
444
- "step": 1775
445
- },
446
- {
447
- "epoch": 2.47,
448
- "learning_rate": 1e-05,
449
- "loss": 0.0743,
450
- "step": 1800
451
- },
452
- {
453
- "epoch": 2.5,
454
- "learning_rate": 1e-05,
455
- "loss": 0.0713,
456
- "step": 1825
457
- },
458
- {
459
- "epoch": 2.54,
460
- "learning_rate": 1e-05,
461
- "loss": 0.062,
462
- "step": 1850
463
- },
464
- {
465
- "epoch": 2.57,
466
- "learning_rate": 1e-05,
467
- "loss": 0.0678,
468
- "step": 1875
469
- },
470
- {
471
- "epoch": 2.61,
472
- "learning_rate": 1e-05,
473
- "loss": 0.0668,
474
- "step": 1900
475
- },
476
- {
477
- "epoch": 2.64,
478
- "learning_rate": 1e-05,
479
- "loss": 0.0739,
480
- "step": 1925
481
- },
482
- {
483
- "epoch": 2.67,
484
- "learning_rate": 1e-05,
485
- "loss": 0.0672,
486
- "step": 1950
487
- },
488
- {
489
- "epoch": 2.71,
490
- "learning_rate": 1e-05,
491
- "loss": 0.0653,
492
- "step": 1975
493
- },
494
- {
495
- "epoch": 2.74,
496
- "learning_rate": 1e-05,
497
- "loss": 0.0696,
498
- "step": 2000
499
- },
500
- {
501
- "epoch": 2.74,
502
- "eval_cer": 7.509688444051142,
503
- "eval_loss": 0.217300683259964,
504
- "eval_runtime": 392.9774,
505
- "eval_samples_per_second": 1.934,
506
- "eval_steps_per_second": 0.122,
507
- "eval_wer": 19.08831908831909,
508
- "step": 2000
509
- },
510
- {
511
- "epoch": 2.78,
512
- "learning_rate": 1e-05,
513
- "loss": 0.0816,
514
- "step": 2025
515
- },
516
- {
517
- "epoch": 2.81,
518
- "learning_rate": 1e-05,
519
- "loss": 0.0796,
520
- "step": 2050
521
- },
522
- {
523
- "epoch": 2.85,
524
- "learning_rate": 1e-05,
525
- "loss": 0.0721,
526
- "step": 2075
527
- },
528
- {
529
- "epoch": 2.88,
530
- "learning_rate": 1e-05,
531
- "loss": 0.0716,
532
- "step": 2100
533
- },
534
- {
535
- "epoch": 2.91,
536
- "learning_rate": 1e-05,
537
- "loss": 0.0659,
538
- "step": 2125
539
- },
540
- {
541
- "epoch": 2.95,
542
- "learning_rate": 1e-05,
543
- "loss": 0.0644,
544
- "step": 2150
545
- },
546
- {
547
- "epoch": 2.98,
548
- "learning_rate": 1e-05,
549
- "loss": 0.0653,
550
- "step": 2175
551
- },
552
- {
553
- "epoch": 3.02,
554
- "learning_rate": 1e-05,
555
- "loss": 0.0558,
556
- "step": 2200
557
- },
558
- {
559
- "epoch": 3.05,
560
- "learning_rate": 1e-05,
561
- "loss": 0.043,
562
- "step": 2225
563
- },
564
- {
565
- "epoch": 3.09,
566
- "learning_rate": 1e-05,
567
- "loss": 0.047,
568
- "step": 2250
569
- },
570
- {
571
- "epoch": 3.12,
572
- "learning_rate": 1e-05,
573
- "loss": 0.0355,
574
- "step": 2275
575
- },
576
- {
577
- "epoch": 3.16,
578
- "learning_rate": 1e-05,
579
- "loss": 0.033,
580
- "step": 2300
581
- },
582
- {
583
- "epoch": 3.19,
584
- "learning_rate": 1e-05,
585
- "loss": 0.0357,
586
- "step": 2325
587
- },
588
- {
589
- "epoch": 3.22,
590
- "learning_rate": 1e-05,
591
- "loss": 0.0364,
592
- "step": 2350
593
- },
594
- {
595
- "epoch": 3.26,
596
- "learning_rate": 1e-05,
597
- "loss": 0.0425,
598
- "step": 2375
599
- },
600
- {
601
- "epoch": 3.29,
602
- "learning_rate": 1e-05,
603
- "loss": 0.0398,
604
- "step": 2400
605
- },
606
- {
607
- "epoch": 3.33,
608
- "learning_rate": 1e-05,
609
- "loss": 0.0376,
610
- "step": 2425
611
- },
612
- {
613
- "epoch": 3.36,
614
- "learning_rate": 1e-05,
615
- "loss": 0.0413,
616
- "step": 2450
617
- },
618
- {
619
- "epoch": 3.4,
620
- "learning_rate": 1e-05,
621
- "loss": 0.0374,
622
- "step": 2475
623
- },
624
- {
625
- "epoch": 3.43,
626
- "learning_rate": 1e-05,
627
- "loss": 0.0383,
628
- "step": 2500
629
- },
630
- {
631
- "epoch": 3.46,
632
- "learning_rate": 1e-05,
633
- "loss": 0.0381,
634
- "step": 2525
635
- },
636
- {
637
- "epoch": 3.5,
638
- "learning_rate": 1e-05,
639
- "loss": 0.0466,
640
- "step": 2550
641
- },
642
- {
643
- "epoch": 3.53,
644
- "learning_rate": 1e-05,
645
- "loss": 0.04,
646
- "step": 2575
647
- },
648
- {
649
- "epoch": 3.57,
650
- "learning_rate": 1e-05,
651
- "loss": 0.0452,
652
- "step": 2600
653
- },
654
- {
655
- "epoch": 3.6,
656
- "learning_rate": 1e-05,
657
- "loss": 0.0393,
658
- "step": 2625
659
- },
660
- {
661
- "epoch": 3.64,
662
- "learning_rate": 1e-05,
663
- "loss": 0.0461,
664
- "step": 2650
665
- },
666
- {
667
- "epoch": 3.67,
668
- "learning_rate": 1e-05,
669
- "loss": 0.0403,
670
- "step": 2675
671
- },
672
- {
673
- "epoch": 3.7,
674
- "learning_rate": 1e-05,
675
- "loss": 0.0489,
676
- "step": 2700
677
- },
678
- {
679
- "epoch": 3.74,
680
- "learning_rate": 1e-05,
681
- "loss": 0.0418,
682
- "step": 2725
683
- },
684
- {
685
- "epoch": 3.77,
686
- "learning_rate": 1e-05,
687
- "loss": 0.0389,
688
- "step": 2750
689
- },
690
- {
691
- "epoch": 3.81,
692
- "learning_rate": 1e-05,
693
- "loss": 0.041,
694
- "step": 2775
695
- },
696
- {
697
- "epoch": 3.84,
698
- "learning_rate": 1e-05,
699
- "loss": 0.0394,
700
- "step": 2800
701
- },
702
- {
703
- "epoch": 3.88,
704
- "learning_rate": 1e-05,
705
- "loss": 0.0485,
706
- "step": 2825
707
- },
708
- {
709
- "epoch": 3.91,
710
- "learning_rate": 1e-05,
711
- "loss": 0.0416,
712
- "step": 2850
713
- },
714
- {
715
- "epoch": 3.94,
716
- "learning_rate": 1e-05,
717
- "loss": 0.0501,
718
- "step": 2875
719
- },
720
- {
721
- "epoch": 3.98,
722
- "learning_rate": 1e-05,
723
- "loss": 0.0393,
724
- "step": 2900
725
- },
726
- {
727
- "epoch": 4.01,
728
- "learning_rate": 1e-05,
729
- "loss": 0.0306,
730
- "step": 2925
731
- },
732
- {
733
- "epoch": 4.05,
734
- "learning_rate": 1e-05,
735
- "loss": 0.0265,
736
- "step": 2950
737
- },
738
- {
739
- "epoch": 4.08,
740
- "learning_rate": 1e-05,
741
- "loss": 0.0223,
742
- "step": 2975
743
- },
744
- {
745
- "epoch": 4.12,
746
- "learning_rate": 1e-05,
747
- "loss": 0.0215,
748
- "step": 3000
749
- },
750
- {
751
- "epoch": 4.12,
752
- "eval_cer": 7.079429983827164,
753
- "eval_loss": 0.24198034405708313,
754
- "eval_runtime": 393.3959,
755
- "eval_samples_per_second": 1.932,
756
- "eval_steps_per_second": 0.122,
757
- "eval_wer": 17.98756798756799,
758
- "step": 3000
759
- },
760
- {
761
- "epoch": 4.15,
762
- "learning_rate": 1e-05,
763
- "loss": 0.0289,
764
- "step": 3025
765
- },
766
- {
767
- "epoch": 4.18,
768
- "learning_rate": 1e-05,
769
- "loss": 0.0226,
770
- "step": 3050
771
- },
772
- {
773
- "epoch": 4.22,
774
- "learning_rate": 1e-05,
775
- "loss": 0.0204,
776
- "step": 3075
777
- },
778
- {
779
- "epoch": 4.25,
780
- "learning_rate": 1e-05,
781
- "loss": 0.0213,
782
- "step": 3100
783
- },
784
- {
785
- "epoch": 4.29,
786
- "learning_rate": 1e-05,
787
- "loss": 0.0223,
788
- "step": 3125
789
- },
790
- {
791
- "epoch": 4.32,
792
- "learning_rate": 1e-05,
793
- "loss": 0.0259,
794
- "step": 3150
795
- },
796
- {
797
- "epoch": 4.36,
798
- "learning_rate": 1e-05,
799
- "loss": 0.0207,
800
- "step": 3175
801
- },
802
- {
803
- "epoch": 4.39,
804
- "learning_rate": 1e-05,
805
- "loss": 0.0235,
806
- "step": 3200
807
- },
808
- {
809
- "epoch": 4.42,
810
- "learning_rate": 1e-05,
811
- "loss": 0.0216,
812
- "step": 3225
813
- },
814
- {
815
- "epoch": 4.46,
816
- "learning_rate": 1e-05,
817
- "loss": 0.0209,
818
- "step": 3250
819
- },
820
- {
821
- "epoch": 4.49,
822
- "learning_rate": 1e-05,
823
- "loss": 0.0233,
824
- "step": 3275
825
- },
826
- {
827
- "epoch": 4.53,
828
- "learning_rate": 1e-05,
829
- "loss": 0.028,
830
- "step": 3300
831
- },
832
- {
833
- "epoch": 4.56,
834
- "learning_rate": 1e-05,
835
- "loss": 0.0283,
836
- "step": 3325
837
- },
838
- {
839
- "epoch": 4.6,
840
- "learning_rate": 1e-05,
841
- "loss": 0.0256,
842
- "step": 3350
843
- },
844
- {
845
- "epoch": 4.63,
846
- "learning_rate": 1e-05,
847
- "loss": 0.0238,
848
- "step": 3375
849
- },
850
- {
851
- "epoch": 4.66,
852
- "learning_rate": 1e-05,
853
- "loss": 0.0255,
854
- "step": 3400
855
- },
856
- {
857
- "epoch": 4.7,
858
- "learning_rate": 1e-05,
859
- "loss": 0.0236,
860
- "step": 3425
861
- },
862
- {
863
- "epoch": 4.73,
864
- "learning_rate": 1e-05,
865
- "loss": 0.0244,
866
- "step": 3450
867
- },
868
- {
869
- "epoch": 4.77,
870
- "learning_rate": 1e-05,
871
- "loss": 0.0342,
872
- "step": 3475
873
- },
874
- {
875
- "epoch": 4.8,
876
- "learning_rate": 1e-05,
877
- "loss": 0.022,
878
- "step": 3500
879
- },
880
- {
881
- "epoch": 4.84,
882
- "learning_rate": 1e-05,
883
- "loss": 0.0231,
884
- "step": 3525
885
- },
886
- {
887
- "epoch": 4.87,
888
- "learning_rate": 1e-05,
889
- "loss": 0.024,
890
- "step": 3550
891
- },
892
- {
893
- "epoch": 4.9,
894
- "learning_rate": 1e-05,
895
- "loss": 0.0275,
896
- "step": 3575
897
- },
898
- {
899
- "epoch": 4.94,
900
- "learning_rate": 1e-05,
901
- "loss": 0.0264,
902
- "step": 3600
903
- },
904
- {
905
- "epoch": 4.97,
906
- "learning_rate": 1e-05,
907
- "loss": 0.0285,
908
- "step": 3625
909
- },
910
- {
911
- "epoch": 5.01,
912
- "learning_rate": 1e-05,
913
- "loss": 0.0199,
914
- "step": 3650
915
- },
916
- {
917
- "epoch": 5.04,
918
- "learning_rate": 1e-05,
919
- "loss": 0.0177,
920
- "step": 3675
921
- },
922
- {
923
- "epoch": 5.08,
924
- "learning_rate": 1e-05,
925
- "loss": 0.0154,
926
- "step": 3700
927
- },
928
- {
929
- "epoch": 5.11,
930
- "learning_rate": 1e-05,
931
- "loss": 0.0126,
932
- "step": 3725
933
- },
934
- {
935
- "epoch": 5.14,
936
- "learning_rate": 1e-05,
937
- "loss": 0.0118,
938
- "step": 3750
939
- },
940
- {
941
- "epoch": 5.18,
942
- "learning_rate": 1e-05,
943
- "loss": 0.0129,
944
- "step": 3775
945
- },
946
- {
947
- "epoch": 5.21,
948
- "learning_rate": 1e-05,
949
- "loss": 0.0141,
950
- "step": 3800
951
- },
952
- {
953
- "epoch": 5.25,
954
- "learning_rate": 1e-05,
955
- "loss": 0.0144,
956
- "step": 3825
957
- },
958
- {
959
- "epoch": 5.28,
960
- "learning_rate": 1e-05,
961
- "loss": 0.0135,
962
- "step": 3850
963
- },
964
- {
965
- "epoch": 5.32,
966
- "learning_rate": 1e-05,
967
- "loss": 0.0185,
968
- "step": 3875
969
- },
970
- {
971
- "epoch": 5.35,
972
- "learning_rate": 1e-05,
973
- "loss": 0.0137,
974
- "step": 3900
975
- },
976
- {
977
- "epoch": 5.38,
978
- "learning_rate": 1e-05,
979
- "loss": 0.0129,
980
- "step": 3925
981
- },
982
- {
983
- "epoch": 5.42,
984
- "learning_rate": 1e-05,
985
- "loss": 0.015,
986
- "step": 3950
987
- },
988
- {
989
- "epoch": 5.45,
990
- "learning_rate": 1e-05,
991
- "loss": 0.0128,
992
- "step": 3975
993
- },
994
- {
995
- "epoch": 5.49,
996
- "learning_rate": 1e-05,
997
- "loss": 0.0199,
998
- "step": 4000
999
- },
1000
- {
1001
- "epoch": 5.49,
1002
- "eval_cer": 7.027554850324982,
1003
- "eval_loss": 0.25804632902145386,
1004
- "eval_runtime": 394.0647,
1005
- "eval_samples_per_second": 1.929,
1006
- "eval_steps_per_second": 0.122,
1007
- "eval_wer": 17.63791763791764,
1008
- "step": 4000
1009
- }
1010
- ],
1011
- "max_steps": 4000,
1012
- "num_train_epochs": 6,
1013
- "total_flos": 1.84636939124736e+19,
1014
- "trial_name": null,
1015
- "trial_params": null
1016
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
last-checkpoint/training_args.bin DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:183b3688d5af801606a749373eb08d97abd31836266567ef26bc14ac7144c015
3
- size 4155
 
 
 
 
runs/Aug20_17-07-13_713cba38a708/events.out.tfevents.1692551245.713cba38a708.95.0 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:87cfbca740150fa86fac7e75b3663363867462c93653210ab71be000e23ad764
3
- size 32409
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9850ec2206f43a150a39e714ff744f011e62ab39f712c7ae579fca47f3526bd4
3
+ size 32763