csikasote commited on
Commit
189d715
1 Parent(s): 8725f91

End of training

Browse files
Files changed (5) hide show
  1. README.md +5 -3
  2. all_results.json +15 -0
  3. eval_results.json +9 -0
  4. train_results.json +9 -0
  5. trainer_state.json +419 -0
README.md CHANGED
@@ -3,6 +3,8 @@ library_name: transformers
3
  license: mit
4
  base_model: facebook/w2v-bert-2.0
5
  tags:
 
 
6
  - generated_from_trainer
7
  metrics:
8
  - wer
@@ -16,10 +18,10 @@ should probably proofread and complete it, then remove this comment. -->
16
 
17
  # w2v-bert-bem-bembaspeech-model
18
 
19
- This model is a fine-tuned version of [facebook/w2v-bert-2.0](https://huggingface.co/facebook/w2v-bert-2.0) on an unknown dataset.
20
  It achieves the following results on the evaluation set:
21
- - Loss: 0.2748
22
- - Wer: 0.5324
23
 
24
  ## Model description
25
 
 
3
  license: mit
4
  base_model: facebook/w2v-bert-2.0
5
  tags:
6
+ - automatic-speech-recognition
7
+ - BembaSpeech
8
  - generated_from_trainer
9
  metrics:
10
  - wer
 
18
 
19
  # w2v-bert-bem-bembaspeech-model
20
 
21
+ This model is a fine-tuned version of [facebook/w2v-bert-2.0](https://huggingface.co/facebook/w2v-bert-2.0) on the BEMBASPEECH - BEM dataset.
22
  It achieves the following results on the evaluation set:
23
+ - Loss: 0.2620
24
+ - Wer: 0.5353
25
 
26
  ## Model description
27
 
all_results.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 4.497540407589599,
3
+ "eval_loss": 0.2620340585708618,
4
+ "eval_runtime": 55.5245,
5
+ "eval_samples": 1492,
6
+ "eval_samples_per_second": 26.871,
7
+ "eval_steps_per_second": 3.368,
8
+ "eval_wer": 0.5353404273726101,
9
+ "total_flos": 9.439925733716597e+18,
10
+ "train_loss": 0.3939369261264801,
11
+ "train_runtime": 5287.5609,
12
+ "train_samples": 11377,
13
+ "train_samples_per_second": 64.55,
14
+ "train_steps_per_second": 4.034
15
+ }
eval_results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 4.497540407589599,
3
+ "eval_loss": 0.2620340585708618,
4
+ "eval_runtime": 55.5245,
5
+ "eval_samples": 1492,
6
+ "eval_samples_per_second": 26.871,
7
+ "eval_steps_per_second": 3.368,
8
+ "eval_wer": 0.5353404273726101
9
+ }
train_results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 4.497540407589599,
3
+ "total_flos": 9.439925733716597e+18,
4
+ "train_loss": 0.3939369261264801,
5
+ "train_runtime": 5287.5609,
6
+ "train_samples": 11377,
7
+ "train_samples_per_second": 64.55,
8
+ "train_steps_per_second": 4.034
9
+ }
trainer_state.json ADDED
@@ -0,0 +1,419 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.2620340585708618,
3
+ "best_model_checkpoint": "/scratch/skscla001/results/w2v-bert-bem-bembaspeech-model/checkpoint-2600",
4
+ "epoch": 4.497540407589599,
5
+ "eval_steps": 200,
6
+ "global_step": 3200,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.14054813773717498,
13
+ "grad_norm": 3.1719706058502197,
14
+ "learning_rate": 0.00029699999999999996,
15
+ "loss": 1.5404,
16
+ "step": 100
17
+ },
18
+ {
19
+ "epoch": 0.28109627547434995,
20
+ "grad_norm": 2.3361048698425293,
21
+ "learning_rate": 0.00029860103626943,
22
+ "loss": 0.7066,
23
+ "step": 200
24
+ },
25
+ {
26
+ "epoch": 0.28109627547434995,
27
+ "eval_loss": 0.5066322684288025,
28
+ "eval_runtime": 58.7073,
29
+ "eval_samples_per_second": 25.414,
30
+ "eval_steps_per_second": 3.185,
31
+ "eval_wer": 0.7647720391037287,
32
+ "step": 200
33
+ },
34
+ {
35
+ "epoch": 0.42164441321152496,
36
+ "grad_norm": 2.280653476715088,
37
+ "learning_rate": 0.0002971879415920866,
38
+ "loss": 0.5841,
39
+ "step": 300
40
+ },
41
+ {
42
+ "epoch": 0.5621925509486999,
43
+ "grad_norm": 3.3147284984588623,
44
+ "learning_rate": 0.00029577484691474326,
45
+ "loss": 0.5538,
46
+ "step": 400
47
+ },
48
+ {
49
+ "epoch": 0.5621925509486999,
50
+ "eval_loss": 0.4313148260116577,
51
+ "eval_runtime": 57.0018,
52
+ "eval_samples_per_second": 26.175,
53
+ "eval_steps_per_second": 3.281,
54
+ "eval_wer": 0.7232459555324855,
55
+ "step": 400
56
+ },
57
+ {
58
+ "epoch": 0.7027406886858749,
59
+ "grad_norm": 2.074723243713379,
60
+ "learning_rate": 0.00029436175223739985,
61
+ "loss": 0.5065,
62
+ "step": 500
63
+ },
64
+ {
65
+ "epoch": 0.8432888264230499,
66
+ "grad_norm": 1.6516566276550293,
67
+ "learning_rate": 0.0002929486575600565,
68
+ "loss": 0.4574,
69
+ "step": 600
70
+ },
71
+ {
72
+ "epoch": 0.8432888264230499,
73
+ "eval_loss": 0.41018426418304443,
74
+ "eval_runtime": 57.3352,
75
+ "eval_samples_per_second": 26.022,
76
+ "eval_steps_per_second": 3.262,
77
+ "eval_wer": 0.6956484124924301,
78
+ "step": 600
79
+ },
80
+ {
81
+ "epoch": 0.9838369641602249,
82
+ "grad_norm": 2.077942132949829,
83
+ "learning_rate": 0.0002915355628827131,
84
+ "loss": 0.4548,
85
+ "step": 700
86
+ },
87
+ {
88
+ "epoch": 1.1243851018973998,
89
+ "grad_norm": 3.208420753479004,
90
+ "learning_rate": 0.00029012246820536974,
91
+ "loss": 0.4084,
92
+ "step": 800
93
+ },
94
+ {
95
+ "epoch": 1.1243851018973998,
96
+ "eval_loss": 0.3528524935245514,
97
+ "eval_runtime": 57.2723,
98
+ "eval_samples_per_second": 26.051,
99
+ "eval_steps_per_second": 3.265,
100
+ "eval_wer": 0.6276494506445194,
101
+ "step": 800
102
+ },
103
+ {
104
+ "epoch": 1.264933239634575,
105
+ "grad_norm": 2.2176339626312256,
106
+ "learning_rate": 0.00028870937352802633,
107
+ "loss": 0.4086,
108
+ "step": 900
109
+ },
110
+ {
111
+ "epoch": 1.4054813773717498,
112
+ "grad_norm": 1.1724199056625366,
113
+ "learning_rate": 0.000287296278850683,
114
+ "loss": 0.388,
115
+ "step": 1000
116
+ },
117
+ {
118
+ "epoch": 1.4054813773717498,
119
+ "eval_loss": 0.3004082143306732,
120
+ "eval_runtime": 56.7843,
121
+ "eval_samples_per_second": 26.275,
122
+ "eval_steps_per_second": 3.293,
123
+ "eval_wer": 0.5723678518903019,
124
+ "step": 1000
125
+ },
126
+ {
127
+ "epoch": 1.5460295151089247,
128
+ "grad_norm": 1.3748365640640259,
129
+ "learning_rate": 0.00028588318417333957,
130
+ "loss": 0.3791,
131
+ "step": 1100
132
+ },
133
+ {
134
+ "epoch": 1.6865776528460998,
135
+ "grad_norm": 1.1393488645553589,
136
+ "learning_rate": 0.0002844700894959962,
137
+ "loss": 0.3803,
138
+ "step": 1200
139
+ },
140
+ {
141
+ "epoch": 1.6865776528460998,
142
+ "eval_loss": 0.3376190662384033,
143
+ "eval_runtime": 56.8427,
144
+ "eval_samples_per_second": 26.248,
145
+ "eval_steps_per_second": 3.29,
146
+ "eval_wer": 0.647720391037287,
147
+ "step": 1200
148
+ },
149
+ {
150
+ "epoch": 1.8271257905832747,
151
+ "grad_norm": 2.872753143310547,
152
+ "learning_rate": 0.0002830569948186528,
153
+ "loss": 0.3644,
154
+ "step": 1300
155
+ },
156
+ {
157
+ "epoch": 1.9676739283204498,
158
+ "grad_norm": 0.8893818855285645,
159
+ "learning_rate": 0.00028164390014130946,
160
+ "loss": 0.367,
161
+ "step": 1400
162
+ },
163
+ {
164
+ "epoch": 1.9676739283204498,
165
+ "eval_loss": 0.29106405377388,
166
+ "eval_runtime": 56.9876,
167
+ "eval_samples_per_second": 26.181,
168
+ "eval_steps_per_second": 3.281,
169
+ "eval_wer": 0.5801539925599101,
170
+ "step": 1400
171
+ },
172
+ {
173
+ "epoch": 2.1082220660576247,
174
+ "grad_norm": 0.9697826504707336,
175
+ "learning_rate": 0.00028023080546396605,
176
+ "loss": 0.3335,
177
+ "step": 1500
178
+ },
179
+ {
180
+ "epoch": 2.2487702037947996,
181
+ "grad_norm": 1.1089781522750854,
182
+ "learning_rate": 0.0002788177107866227,
183
+ "loss": 0.3168,
184
+ "step": 1600
185
+ },
186
+ {
187
+ "epoch": 2.2487702037947996,
188
+ "eval_loss": 0.31056511402130127,
189
+ "eval_runtime": 57.0385,
190
+ "eval_samples_per_second": 26.158,
191
+ "eval_steps_per_second": 3.278,
192
+ "eval_wer": 0.5724543645644087,
193
+ "step": 1600
194
+ },
195
+ {
196
+ "epoch": 2.3893183415319745,
197
+ "grad_norm": 0.8601323962211609,
198
+ "learning_rate": 0.0002774046161092793,
199
+ "loss": 0.3268,
200
+ "step": 1700
201
+ },
202
+ {
203
+ "epoch": 2.52986647926915,
204
+ "grad_norm": 0.7583444714546204,
205
+ "learning_rate": 0.00027599152143193594,
206
+ "loss": 0.3227,
207
+ "step": 1800
208
+ },
209
+ {
210
+ "epoch": 2.52986647926915,
211
+ "eval_loss": 0.26542848348617554,
212
+ "eval_runtime": 57.1567,
213
+ "eval_samples_per_second": 26.104,
214
+ "eval_steps_per_second": 3.272,
215
+ "eval_wer": 0.5348213513279696,
216
+ "step": 1800
217
+ },
218
+ {
219
+ "epoch": 2.6704146170063248,
220
+ "grad_norm": 1.2712171077728271,
221
+ "learning_rate": 0.00027457842675459253,
222
+ "loss": 0.3081,
223
+ "step": 1900
224
+ },
225
+ {
226
+ "epoch": 2.8109627547434997,
227
+ "grad_norm": 0.7406817674636841,
228
+ "learning_rate": 0.0002731653320772492,
229
+ "loss": 0.3111,
230
+ "step": 2000
231
+ },
232
+ {
233
+ "epoch": 2.8109627547434997,
234
+ "eval_loss": 0.26211297512054443,
235
+ "eval_runtime": 56.777,
236
+ "eval_samples_per_second": 26.278,
237
+ "eval_steps_per_second": 3.294,
238
+ "eval_wer": 0.5494419932520114,
239
+ "step": 2000
240
+ },
241
+ {
242
+ "epoch": 2.9515108924806746,
243
+ "grad_norm": 1.053320050239563,
244
+ "learning_rate": 0.0002717522373999058,
245
+ "loss": 0.3181,
246
+ "step": 2100
247
+ },
248
+ {
249
+ "epoch": 3.0920590302178494,
250
+ "grad_norm": 0.7663154006004333,
251
+ "learning_rate": 0.00027033914272256237,
252
+ "loss": 0.2823,
253
+ "step": 2200
254
+ },
255
+ {
256
+ "epoch": 3.0920590302178494,
257
+ "eval_loss": 0.2665364444255829,
258
+ "eval_runtime": 56.8072,
259
+ "eval_samples_per_second": 26.264,
260
+ "eval_steps_per_second": 3.292,
261
+ "eval_wer": 0.5421749286270439,
262
+ "step": 2200
263
+ },
264
+ {
265
+ "epoch": 3.232607167955025,
266
+ "grad_norm": 0.5670559406280518,
267
+ "learning_rate": 0.000268926048045219,
268
+ "loss": 0.2593,
269
+ "step": 2300
270
+ },
271
+ {
272
+ "epoch": 3.3731553056921997,
273
+ "grad_norm": 0.800305187702179,
274
+ "learning_rate": 0.0002675129533678756,
275
+ "loss": 0.2603,
276
+ "step": 2400
277
+ },
278
+ {
279
+ "epoch": 3.3731553056921997,
280
+ "eval_loss": 0.26233434677124023,
281
+ "eval_runtime": 56.2939,
282
+ "eval_samples_per_second": 26.504,
283
+ "eval_steps_per_second": 3.322,
284
+ "eval_wer": 0.5174323038325115,
285
+ "step": 2400
286
+ },
287
+ {
288
+ "epoch": 3.5137034434293746,
289
+ "grad_norm": 0.581555187702179,
290
+ "learning_rate": 0.00026609985869053225,
291
+ "loss": 0.2835,
292
+ "step": 2500
293
+ },
294
+ {
295
+ "epoch": 3.6542515811665495,
296
+ "grad_norm": 0.5515788793563843,
297
+ "learning_rate": 0.00026468676401318885,
298
+ "loss": 0.2735,
299
+ "step": 2600
300
+ },
301
+ {
302
+ "epoch": 3.6542515811665495,
303
+ "eval_loss": 0.2620340585708618,
304
+ "eval_runtime": 57.2283,
305
+ "eval_samples_per_second": 26.071,
306
+ "eval_steps_per_second": 3.268,
307
+ "eval_wer": 0.5353404273726101,
308
+ "step": 2600
309
+ },
310
+ {
311
+ "epoch": 3.7947997189037244,
312
+ "grad_norm": 0.6277577877044678,
313
+ "learning_rate": 0.0002632736693358455,
314
+ "loss": 0.2806,
315
+ "step": 2700
316
+ },
317
+ {
318
+ "epoch": 3.9353478566408997,
319
+ "grad_norm": 0.5215665698051453,
320
+ "learning_rate": 0.0002618605746585021,
321
+ "loss": 0.2666,
322
+ "step": 2800
323
+ },
324
+ {
325
+ "epoch": 3.9353478566408997,
326
+ "eval_loss": 0.2752685844898224,
327
+ "eval_runtime": 57.0665,
328
+ "eval_samples_per_second": 26.145,
329
+ "eval_steps_per_second": 3.277,
330
+ "eval_wer": 0.5450298468725668,
331
+ "step": 2800
332
+ },
333
+ {
334
+ "epoch": 4.075895994378074,
335
+ "grad_norm": 0.745657205581665,
336
+ "learning_rate": 0.0002604474799811587,
337
+ "loss": 0.2479,
338
+ "step": 2900
339
+ },
340
+ {
341
+ "epoch": 4.2164441321152495,
342
+ "grad_norm": 0.44574543833732605,
343
+ "learning_rate": 0.00025903438530381533,
344
+ "loss": 0.2248,
345
+ "step": 3000
346
+ },
347
+ {
348
+ "epoch": 4.2164441321152495,
349
+ "eval_loss": 0.28806060552597046,
350
+ "eval_runtime": 56.7213,
351
+ "eval_samples_per_second": 26.304,
352
+ "eval_steps_per_second": 3.297,
353
+ "eval_wer": 0.5817977333679384,
354
+ "step": 3000
355
+ },
356
+ {
357
+ "epoch": 4.356992269852425,
358
+ "grad_norm": 0.690984845161438,
359
+ "learning_rate": 0.0002576212906264719,
360
+ "loss": 0.2502,
361
+ "step": 3100
362
+ },
363
+ {
364
+ "epoch": 4.497540407589599,
365
+ "grad_norm": 0.47614070773124695,
366
+ "learning_rate": 0.00025620819594912857,
367
+ "loss": 0.2408,
368
+ "step": 3200
369
+ },
370
+ {
371
+ "epoch": 4.497540407589599,
372
+ "eval_loss": 0.2748269736766815,
373
+ "eval_runtime": 57.1007,
374
+ "eval_samples_per_second": 26.129,
375
+ "eval_steps_per_second": 3.275,
376
+ "eval_wer": 0.5323989964529804,
377
+ "step": 3200
378
+ },
379
+ {
380
+ "epoch": 4.497540407589599,
381
+ "step": 3200,
382
+ "total_flos": 9.439925733716597e+18,
383
+ "train_loss": 0.3939369261264801,
384
+ "train_runtime": 5287.5609,
385
+ "train_samples_per_second": 64.55,
386
+ "train_steps_per_second": 4.034
387
+ }
388
+ ],
389
+ "logging_steps": 100,
390
+ "max_steps": 21330,
391
+ "num_input_tokens_seen": 0,
392
+ "num_train_epochs": 30,
393
+ "save_steps": 200,
394
+ "stateful_callbacks": {
395
+ "EarlyStoppingCallback": {
396
+ "args": {
397
+ "early_stopping_patience": 3,
398
+ "early_stopping_threshold": 0.0
399
+ },
400
+ "attributes": {
401
+ "early_stopping_patience_counter": 3
402
+ }
403
+ },
404
+ "TrainerControl": {
405
+ "args": {
406
+ "should_epoch_stop": false,
407
+ "should_evaluate": false,
408
+ "should_log": false,
409
+ "should_save": true,
410
+ "should_training_stop": true
411
+ },
412
+ "attributes": {}
413
+ }
414
+ },
415
+ "total_flos": 9.439925733716597e+18,
416
+ "train_batch_size": 8,
417
+ "trial_name": null,
418
+ "trial_params": null
419
+ }