DewiBrynJones commited on
Commit
9344f43
1 Parent(s): 82b06da

End of training

Browse files
README.md CHANGED
@@ -2,6 +2,8 @@
2
  license: apache-2.0
3
  base_model: facebook/wav2vec2-large-xlsr-53
4
  tags:
 
 
5
  - generated_from_trainer
6
  metrics:
7
  - wer
@@ -15,7 +17,7 @@ should probably proofread and complete it, then remove this comment. -->
15
 
16
  # wav2vec2-xlsr-53-ft-btb-ccv-cy
17
 
18
- This model is a fine-tuned version of [facebook/wav2vec2-large-xlsr-53](https://huggingface.co/facebook/wav2vec2-large-xlsr-53) on an unknown dataset.
19
  It achieves the following results on the evaluation set:
20
  - Loss: inf
21
  - Wer: 0.3289
 
2
  license: apache-2.0
3
  base_model: facebook/wav2vec2-large-xlsr-53
4
  tags:
5
+ - automatic-speech-recognition
6
+ - DewiBrynJones/banc-trawsgrifiadau-bangor-clean-with-ccv
7
  - generated_from_trainer
8
  metrics:
9
  - wer
 
17
 
18
  # wav2vec2-xlsr-53-ft-btb-ccv-cy
19
 
20
+ This model is a fine-tuned version of [facebook/wav2vec2-large-xlsr-53](https://huggingface.co/facebook/wav2vec2-large-xlsr-53) on the DEWIBRYNJONES/BANC-TRAWSGRIFIADAU-BANGOR-CLEAN-WITH-CCV - DEFAULT dataset.
21
  It achieves the following results on the evaluation set:
22
  - Loss: inf
23
  - Wer: 0.3289
all_results.json CHANGED
@@ -1,15 +1,15 @@
1
  {
2
- "epoch": 1.5444015444015444,
3
  "eval_loss": Infinity,
4
- "eval_runtime": 179.4638,
5
  "eval_samples": 7022,
6
- "eval_samples_per_second": 39.128,
7
- "eval_steps_per_second": 4.892,
8
- "eval_wer": 0.3500967369145469,
9
- "total_flos": 8.411238439350073e+18,
10
- "train_loss": 1.0689988899230958,
11
- "train_runtime": 5961.6242,
12
  "train_samples": 41435,
13
- "train_samples_per_second": 10.735,
14
- "train_steps_per_second": 0.671
15
  }
 
1
  {
2
+ "epoch": 2.3166023166023164,
3
  "eval_loss": Infinity,
4
+ "eval_runtime": 182.873,
5
  "eval_samples": 7022,
6
+ "eval_samples_per_second": 38.398,
7
+ "eval_steps_per_second": 4.801,
8
+ "eval_wer": 0.3289055094594419,
9
+ "total_flos": 1.261526897313927e+19,
10
+ "train_loss": 0.8721037826538086,
11
+ "train_runtime": 9008.7975,
12
  "train_samples": 41435,
13
+ "train_samples_per_second": 10.656,
14
+ "train_steps_per_second": 0.666
15
  }
eval_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "epoch": 1.5444015444015444,
3
  "eval_loss": Infinity,
4
- "eval_runtime": 179.4638,
5
  "eval_samples": 7022,
6
- "eval_samples_per_second": 39.128,
7
- "eval_steps_per_second": 4.892,
8
- "eval_wer": 0.3500967369145469
9
  }
 
1
  {
2
+ "epoch": 2.3166023166023164,
3
  "eval_loss": Infinity,
4
+ "eval_runtime": 182.873,
5
  "eval_samples": 7022,
6
+ "eval_samples_per_second": 38.398,
7
+ "eval_steps_per_second": 4.801,
8
+ "eval_wer": 0.3289055094594419
9
  }
runs/Aug12_13-56-04_940cfb8ec62d/events.out.tfevents.1723476624.940cfb8ec62d.1583.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9ed22e8ff2cd1ec9d2ef03daff93297dd4f5055c7d749ed58bc729ce23be64ef
3
+ size 406
train_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "epoch": 1.5444015444015444,
3
- "total_flos": 8.411238439350073e+18,
4
- "train_loss": 1.0689988899230958,
5
- "train_runtime": 5961.6242,
6
  "train_samples": 41435,
7
- "train_samples_per_second": 10.735,
8
- "train_steps_per_second": 0.671
9
  }
 
1
  {
2
+ "epoch": 2.3166023166023164,
3
+ "total_flos": 1.261526897313927e+19,
4
+ "train_loss": 0.8721037826538086,
5
+ "train_runtime": 9008.7975,
6
  "train_samples": 41435,
7
+ "train_samples_per_second": 10.656,
8
+ "train_steps_per_second": 0.666
9
  }
trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 1.5444015444015444,
5
  "eval_steps": 200,
6
- "global_step": 4000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -11,253 +11,371 @@
11
  {
12
  "epoch": 0.07722007722007722,
13
  "eval_loss": Infinity,
14
- "eval_runtime": 178.8025,
15
- "eval_samples_per_second": 39.272,
16
- "eval_steps_per_second": 4.91,
17
  "eval_wer": 1.0,
18
  "step": 200
19
  },
20
  {
21
  "epoch": 0.15444015444015444,
22
  "eval_loss": Infinity,
23
- "eval_runtime": 176.9522,
24
- "eval_samples_per_second": 39.683,
25
- "eval_steps_per_second": 4.962,
26
- "eval_wer": 0.9261812940672354,
27
  "step": 400
28
  },
29
  {
30
  "epoch": 0.19305019305019305,
31
- "grad_norm": 3.265190362930298,
32
  "learning_rate": 0.00029699999999999996,
33
- "loss": 3.9094,
34
  "step": 500
35
  },
36
  {
37
  "epoch": 0.23166023166023167,
38
  "eval_loss": Infinity,
39
- "eval_runtime": 177.1009,
40
- "eval_samples_per_second": 39.65,
41
- "eval_steps_per_second": 4.958,
42
- "eval_wer": 0.7767000376559802,
43
  "step": 600
44
  },
45
  {
46
  "epoch": 0.3088803088803089,
47
  "eval_loss": Infinity,
48
- "eval_runtime": 177.4675,
49
- "eval_samples_per_second": 39.568,
50
- "eval_steps_per_second": 4.947,
51
- "eval_wer": 0.7365379870930882,
52
  "step": 800
53
  },
54
  {
55
  "epoch": 0.3861003861003861,
56
- "grad_norm": 3.0774288177490234,
57
- "learning_rate": 0.00025765714285714284,
58
- "loss": 0.9885,
59
  "step": 1000
60
  },
61
  {
62
  "epoch": 0.3861003861003861,
63
  "eval_loss": Infinity,
64
- "eval_runtime": 177.9576,
65
- "eval_samples_per_second": 39.459,
66
- "eval_steps_per_second": 4.934,
67
- "eval_wer": 0.6104813473050005,
68
  "step": 1000
69
  },
70
  {
71
  "epoch": 0.46332046332046334,
72
  "eval_loss": Infinity,
73
- "eval_runtime": 176.7839,
74
- "eval_samples_per_second": 39.721,
75
- "eval_steps_per_second": 4.967,
76
- "eval_wer": 0.5430511731785542,
77
  "step": 1200
78
  },
79
  {
80
  "epoch": 0.5405405405405406,
81
  "eval_loss": Infinity,
82
- "eval_runtime": 177.8714,
83
- "eval_samples_per_second": 39.478,
84
- "eval_steps_per_second": 4.936,
85
- "eval_wer": 0.5144845675405451,
86
  "step": 1400
87
  },
88
  {
89
  "epoch": 0.5791505791505791,
90
- "grad_norm": 3.033381700515747,
91
- "learning_rate": 0.00021488571428571426,
92
- "loss": 0.7974,
93
  "step": 1500
94
  },
95
  {
96
  "epoch": 0.6177606177606177,
97
  "eval_loss": Infinity,
98
- "eval_runtime": 178.2105,
99
- "eval_samples_per_second": 39.403,
100
- "eval_steps_per_second": 4.927,
101
- "eval_wer": 0.5230675340526924,
102
  "step": 1600
103
  },
104
  {
105
  "epoch": 0.694980694980695,
106
  "eval_loss": Infinity,
107
- "eval_runtime": 178.6698,
108
- "eval_samples_per_second": 39.302,
109
- "eval_steps_per_second": 4.914,
110
- "eval_wer": 0.46214275511926556,
111
  "step": 1800
112
  },
113
  {
114
  "epoch": 0.7722007722007722,
115
- "grad_norm": 3.2283124923706055,
116
- "learning_rate": 0.0001721142857142857,
117
- "loss": 0.7068,
118
  "step": 2000
119
  },
120
  {
121
  "epoch": 0.7722007722007722,
122
  "eval_loss": Infinity,
123
- "eval_runtime": 179.2886,
124
- "eval_samples_per_second": 39.166,
125
- "eval_steps_per_second": 4.897,
126
- "eval_wer": 0.4532481529092491,
127
  "step": 2000
128
  },
129
  {
130
  "epoch": 0.8494208494208494,
131
  "eval_loss": Infinity,
132
- "eval_runtime": 179.5146,
133
- "eval_samples_per_second": 39.117,
134
- "eval_steps_per_second": 4.891,
135
- "eval_wer": 0.4404061651928895,
136
  "step": 2200
137
  },
138
  {
139
  "epoch": 0.9266409266409267,
140
  "eval_loss": Infinity,
141
- "eval_runtime": 178.4125,
142
- "eval_samples_per_second": 39.358,
143
- "eval_steps_per_second": 4.921,
144
- "eval_wer": 0.4195006037941646,
145
  "step": 2400
146
  },
147
  {
148
  "epoch": 0.9652509652509652,
149
- "grad_norm": 1.9969470500946045,
150
- "learning_rate": 0.00012925714285714286,
151
- "loss": 0.6447,
152
  "step": 2500
153
  },
154
  {
155
  "epoch": 1.0038610038610039,
156
  "eval_loss": Infinity,
157
- "eval_runtime": 179.4629,
158
- "eval_samples_per_second": 39.128,
159
- "eval_steps_per_second": 4.892,
160
- "eval_wer": 0.4115928479607339,
161
  "step": 2600
162
  },
163
  {
164
  "epoch": 1.0810810810810811,
165
  "eval_loss": Infinity,
166
- "eval_runtime": 182.2946,
167
- "eval_samples_per_second": 38.52,
168
- "eval_steps_per_second": 4.816,
169
- "eval_wer": 0.38897329022372845,
170
  "step": 2800
171
  },
172
  {
173
  "epoch": 1.1583011583011582,
174
- "grad_norm": 0.5247506499290466,
175
- "learning_rate": 8.648571428571429e-05,
176
- "loss": 0.5318,
177
  "step": 3000
178
  },
179
  {
180
  "epoch": 1.1583011583011582,
181
  "eval_loss": Infinity,
182
- "eval_runtime": 180.6608,
183
- "eval_samples_per_second": 38.868,
184
- "eval_steps_per_second": 4.86,
185
- "eval_wer": 0.38040330853232573,
186
  "step": 3000
187
  },
188
  {
189
  "epoch": 1.2355212355212355,
190
  "eval_loss": Infinity,
191
- "eval_runtime": 178.267,
192
- "eval_samples_per_second": 39.39,
193
- "eval_steps_per_second": 4.925,
194
- "eval_wer": 0.37341747497175803,
195
  "step": 3200
196
  },
197
  {
198
  "epoch": 1.3127413127413128,
199
  "eval_loss": Infinity,
200
- "eval_runtime": 179.0534,
201
- "eval_samples_per_second": 39.217,
202
- "eval_steps_per_second": 4.904,
203
- "eval_wer": 0.36753535117447705,
204
  "step": 3400
205
  },
206
  {
207
  "epoch": 1.3513513513513513,
208
- "grad_norm": 0.6272704005241394,
209
- "learning_rate": 4.3799999999999994e-05,
210
- "loss": 0.494,
211
  "step": 3500
212
  },
213
  {
214
  "epoch": 1.3899613899613898,
215
  "eval_loss": Infinity,
216
- "eval_runtime": 180.0451,
217
- "eval_samples_per_second": 39.001,
218
- "eval_steps_per_second": 4.877,
219
- "eval_wer": 0.3590173087660525,
220
  "step": 3600
221
  },
222
  {
223
  "epoch": 1.4671814671814671,
224
  "eval_loss": Infinity,
225
- "eval_runtime": 179.7417,
226
- "eval_samples_per_second": 39.067,
227
- "eval_steps_per_second": 4.885,
228
- "eval_wer": 0.35345980548738526,
229
  "step": 3800
230
  },
231
  {
232
  "epoch": 1.5444015444015444,
233
- "grad_norm": 0.7731852531433105,
234
- "learning_rate": 1.0285714285714284e-06,
235
- "loss": 0.4794,
236
  "step": 4000
237
  },
238
  {
239
  "epoch": 1.5444015444015444,
240
  "eval_loss": Infinity,
241
- "eval_runtime": 179.8748,
242
- "eval_samples_per_second": 39.038,
243
- "eval_steps_per_second": 4.881,
244
- "eval_wer": 0.3500967369145469,
245
  "step": 4000
246
  },
247
  {
248
- "epoch": 1.5444015444015444,
249
- "step": 4000,
250
- "total_flos": 8.411238439350073e+18,
251
- "train_loss": 1.0689988899230958,
252
- "train_runtime": 5961.6242,
253
- "train_samples_per_second": 10.735,
254
- "train_steps_per_second": 0.671
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
255
  }
256
  ],
257
  "logging_steps": 500,
258
- "max_steps": 4000,
259
  "num_input_tokens_seen": 0,
260
- "num_train_epochs": 2,
261
  "save_steps": 500,
262
  "stateful_callbacks": {
263
  "TrainerControl": {
@@ -271,7 +389,7 @@
271
  "attributes": {}
272
  }
273
  },
274
- "total_flos": 8.411238439350073e+18,
275
  "train_batch_size": 16,
276
  "trial_name": null,
277
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 2.3166023166023164,
5
  "eval_steps": 200,
6
+ "global_step": 6000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
11
  {
12
  "epoch": 0.07722007722007722,
13
  "eval_loss": Infinity,
14
+ "eval_runtime": 199.2963,
15
+ "eval_samples_per_second": 35.234,
16
+ "eval_steps_per_second": 4.406,
17
  "eval_wer": 1.0,
18
  "step": 200
19
  },
20
  {
21
  "epoch": 0.15444015444015444,
22
  "eval_loss": Infinity,
23
+ "eval_runtime": 187.0234,
24
+ "eval_samples_per_second": 37.546,
25
+ "eval_steps_per_second": 4.695,
26
+ "eval_wer": 0.9937672860426162,
27
  "step": 400
28
  },
29
  {
30
  "epoch": 0.19305019305019305,
31
+ "grad_norm": 4.210392951965332,
32
  "learning_rate": 0.00029699999999999996,
33
+ "loss": 3.9317,
34
  "step": 500
35
  },
36
  {
37
  "epoch": 0.23166023166023167,
38
  "eval_loss": Infinity,
39
+ "eval_runtime": 180.1705,
40
+ "eval_samples_per_second": 38.974,
41
+ "eval_steps_per_second": 4.873,
42
+ "eval_wer": 0.7576253359822368,
43
  "step": 600
44
  },
45
  {
46
  "epoch": 0.3088803088803089,
47
  "eval_loss": Infinity,
48
+ "eval_runtime": 179.0087,
49
+ "eval_samples_per_second": 39.227,
50
+ "eval_steps_per_second": 4.905,
51
+ "eval_wer": 0.6941685170036228,
52
  "step": 800
53
  },
54
  {
55
  "epoch": 0.3861003861003861,
56
+ "grad_norm": 3.149083137512207,
57
+ "learning_rate": 0.0002730545454545454,
58
+ "loss": 0.9699,
59
  "step": 1000
60
  },
61
  {
62
  "epoch": 0.3861003861003861,
63
  "eval_loss": Infinity,
64
+ "eval_runtime": 179.6063,
65
+ "eval_samples_per_second": 39.097,
66
+ "eval_steps_per_second": 4.888,
67
+ "eval_wer": 0.5762923142846013,
68
  "step": 1000
69
  },
70
  {
71
  "epoch": 0.46332046332046334,
72
  "eval_loss": Infinity,
73
+ "eval_runtime": 180.175,
74
+ "eval_samples_per_second": 38.973,
75
+ "eval_steps_per_second": 4.873,
76
+ "eval_wer": 0.5518548816433589,
77
  "step": 1200
78
  },
79
  {
80
  "epoch": 0.5405405405405406,
81
  "eval_loss": Infinity,
82
+ "eval_runtime": 180.7382,
83
+ "eval_samples_per_second": 38.852,
84
+ "eval_steps_per_second": 4.858,
85
+ "eval_wer": 0.5173931673873242,
86
  "step": 1400
87
  },
88
  {
89
  "epoch": 0.5791505791505791,
90
+ "grad_norm": 4.50093412399292,
91
+ "learning_rate": 0.0002458363636363636,
92
+ "loss": 0.8031,
93
  "step": 1500
94
  },
95
  {
96
  "epoch": 0.6177606177606177,
97
  "eval_loss": Infinity,
98
+ "eval_runtime": 180.2438,
99
+ "eval_samples_per_second": 38.958,
100
+ "eval_steps_per_second": 4.871,
101
+ "eval_wer": 0.5338059808084349,
102
  "step": 1600
103
  },
104
  {
105
  "epoch": 0.694980694980695,
106
  "eval_loss": Infinity,
107
+ "eval_runtime": 180.7932,
108
+ "eval_samples_per_second": 38.84,
109
+ "eval_steps_per_second": 4.856,
110
+ "eval_wer": 0.47772454001272513,
111
  "step": 1800
112
  },
113
  {
114
  "epoch": 0.7722007722007722,
115
+ "grad_norm": 3.7705626487731934,
116
+ "learning_rate": 0.0002186181818181818,
117
+ "loss": 0.7169,
118
  "step": 2000
119
  },
120
  {
121
  "epoch": 0.7722007722007722,
122
  "eval_loss": Infinity,
123
+ "eval_runtime": 179.9877,
124
+ "eval_samples_per_second": 39.014,
125
+ "eval_steps_per_second": 4.878,
126
+ "eval_wer": 0.45044343162842637,
127
  "step": 2000
128
  },
129
  {
130
  "epoch": 0.8494208494208494,
131
  "eval_loss": Infinity,
132
+ "eval_runtime": 182.9186,
133
+ "eval_samples_per_second": 38.389,
134
+ "eval_steps_per_second": 4.8,
135
+ "eval_wer": 0.4499500084401335,
136
  "step": 2200
137
  },
138
  {
139
  "epoch": 0.9266409266409267,
140
  "eval_loss": Infinity,
141
+ "eval_runtime": 180.2372,
142
+ "eval_samples_per_second": 38.96,
143
+ "eval_steps_per_second": 4.871,
144
+ "eval_wer": 0.4431719320114786,
145
  "step": 2400
146
  },
147
  {
148
  "epoch": 0.9652509652509652,
149
+ "grad_norm": 3.5582635402679443,
150
+ "learning_rate": 0.00019134545454545454,
151
+ "loss": 0.6687,
152
  "step": 2500
153
  },
154
  {
155
  "epoch": 1.0038610038610039,
156
  "eval_loss": Infinity,
157
+ "eval_runtime": 180.3998,
158
+ "eval_samples_per_second": 38.925,
159
+ "eval_steps_per_second": 4.867,
160
+ "eval_wer": 0.4175658655032267,
161
  "step": 2600
162
  },
163
  {
164
  "epoch": 1.0810810810810811,
165
  "eval_loss": Infinity,
166
+ "eval_runtime": 180.1365,
167
+ "eval_samples_per_second": 38.982,
168
+ "eval_steps_per_second": 4.874,
169
+ "eval_wer": 0.40537311882409466,
170
  "step": 2800
171
  },
172
  {
173
  "epoch": 1.1583011583011582,
174
+ "grad_norm": 0.5179678201675415,
175
+ "learning_rate": 0.0001641272727272727,
176
+ "loss": 0.5609,
177
  "step": 3000
178
  },
179
  {
180
  "epoch": 1.1583011583011582,
181
  "eval_loss": Infinity,
182
+ "eval_runtime": 180.8398,
183
+ "eval_samples_per_second": 38.83,
184
+ "eval_steps_per_second": 4.855,
185
+ "eval_wer": 0.4009193253087141,
186
  "step": 3000
187
  },
188
  {
189
  "epoch": 1.2355212355212355,
190
  "eval_loss": Infinity,
191
+ "eval_runtime": 181.6098,
192
+ "eval_samples_per_second": 38.665,
193
+ "eval_steps_per_second": 4.835,
194
+ "eval_wer": 0.4022567618454027,
195
  "step": 3200
196
  },
197
  {
198
  "epoch": 1.3127413127413128,
199
  "eval_loss": Infinity,
200
+ "eval_runtime": 182.1259,
201
+ "eval_samples_per_second": 38.556,
202
+ "eval_steps_per_second": 4.821,
203
+ "eval_wer": 0.39188189007050755,
204
  "step": 3400
205
  },
206
  {
207
  "epoch": 1.3513513513513513,
208
+ "grad_norm": 0.5364826321601868,
209
+ "learning_rate": 0.00013696363636363636,
210
+ "loss": 0.5324,
211
  "step": 3500
212
  },
213
  {
214
  "epoch": 1.3899613899613898,
215
  "eval_loss": Infinity,
216
+ "eval_runtime": 181.3225,
217
+ "eval_samples_per_second": 38.727,
218
+ "eval_steps_per_second": 4.842,
219
+ "eval_wer": 0.3794813862594627,
220
  "step": 3600
221
  },
222
  {
223
  "epoch": 1.4671814671814671,
224
  "eval_loss": Infinity,
225
+ "eval_runtime": 181.5845,
226
+ "eval_samples_per_second": 38.671,
227
+ "eval_steps_per_second": 4.835,
228
+ "eval_wer": 0.37523534987599494,
229
  "step": 3800
230
  },
231
  {
232
  "epoch": 1.5444015444015444,
233
+ "grad_norm": 1.197091817855835,
234
+ "learning_rate": 0.00010974545454545454,
235
+ "loss": 0.5196,
236
  "step": 4000
237
  },
238
  {
239
  "epoch": 1.5444015444015444,
240
  "eval_loss": Infinity,
241
+ "eval_runtime": 181.3235,
242
+ "eval_samples_per_second": 38.726,
243
+ "eval_steps_per_second": 4.842,
244
+ "eval_wer": 0.36617194499629935,
245
  "step": 4000
246
  },
247
  {
248
+ "epoch": 1.6216216216216215,
249
+ "eval_loss": Infinity,
250
+ "eval_runtime": 181.9653,
251
+ "eval_samples_per_second": 38.59,
252
+ "eval_steps_per_second": 4.825,
253
+ "eval_wer": 0.37034007245529976,
254
+ "step": 4200
255
+ },
256
+ {
257
+ "epoch": 1.698841698841699,
258
+ "eval_loss": Infinity,
259
+ "eval_runtime": 181.5241,
260
+ "eval_samples_per_second": 38.684,
261
+ "eval_steps_per_second": 4.837,
262
+ "eval_wer": 0.3613935309623051,
263
+ "step": 4400
264
+ },
265
+ {
266
+ "epoch": 1.7374517374517375,
267
+ "grad_norm": 0.7301501631736755,
268
+ "learning_rate": 8.247272727272728e-05,
269
+ "loss": 0.4967,
270
+ "step": 4500
271
+ },
272
+ {
273
+ "epoch": 1.776061776061776,
274
+ "eval_loss": Infinity,
275
+ "eval_runtime": 181.6896,
276
+ "eval_samples_per_second": 38.648,
277
+ "eval_steps_per_second": 4.832,
278
+ "eval_wer": 0.3530313064028151,
279
+ "step": 4600
280
+ },
281
+ {
282
+ "epoch": 1.8532818532818531,
283
+ "eval_loss": Infinity,
284
+ "eval_runtime": 184.0459,
285
+ "eval_samples_per_second": 38.154,
286
+ "eval_steps_per_second": 4.771,
287
+ "eval_wer": 0.34805812005765263,
288
+ "step": 4800
289
+ },
290
+ {
291
+ "epoch": 1.9305019305019306,
292
+ "grad_norm": 1.2918003797531128,
293
+ "learning_rate": 5.519999999999999e-05,
294
+ "loss": 0.4735,
295
+ "step": 5000
296
+ },
297
+ {
298
+ "epoch": 1.9305019305019306,
299
+ "eval_loss": Infinity,
300
+ "eval_runtime": 182.1737,
301
+ "eval_samples_per_second": 38.546,
302
+ "eval_steps_per_second": 4.82,
303
+ "eval_wer": 0.35057717528209525,
304
+ "step": 5000
305
+ },
306
+ {
307
+ "epoch": 2.0077220077220077,
308
+ "eval_loss": Infinity,
309
+ "eval_runtime": 182.5384,
310
+ "eval_samples_per_second": 38.469,
311
+ "eval_steps_per_second": 4.81,
312
+ "eval_wer": 0.3432277667406801,
313
+ "step": 5200
314
+ },
315
+ {
316
+ "epoch": 2.0849420849420848,
317
+ "eval_loss": Infinity,
318
+ "eval_runtime": 182.9254,
319
+ "eval_samples_per_second": 38.387,
320
+ "eval_steps_per_second": 4.8,
321
+ "eval_wer": 0.33689117421733994,
322
+ "step": 5400
323
+ },
324
+ {
325
+ "epoch": 2.1235521235521237,
326
+ "grad_norm": 0.6086732745170593,
327
+ "learning_rate": 2.7927272727272724e-05,
328
+ "loss": 0.4244,
329
+ "step": 5500
330
+ },
331
+ {
332
+ "epoch": 2.1621621621621623,
333
+ "eval_loss": Infinity,
334
+ "eval_runtime": 182.4452,
335
+ "eval_samples_per_second": 38.488,
336
+ "eval_steps_per_second": 4.812,
337
+ "eval_wer": 0.32959370495890306,
338
+ "step": 5600
339
+ },
340
+ {
341
+ "epoch": 2.2393822393822393,
342
+ "eval_loss": Infinity,
343
+ "eval_runtime": 182.1862,
344
+ "eval_samples_per_second": 38.543,
345
+ "eval_steps_per_second": 4.819,
346
+ "eval_wer": 0.32954176567592486,
347
+ "step": 5800
348
+ },
349
+ {
350
+ "epoch": 2.3166023166023164,
351
+ "grad_norm": 1.0216798782348633,
352
+ "learning_rate": 6.545454545454546e-07,
353
+ "loss": 0.3674,
354
+ "step": 6000
355
+ },
356
+ {
357
+ "epoch": 2.3166023166023164,
358
+ "eval_loss": Infinity,
359
+ "eval_runtime": 182.1846,
360
+ "eval_samples_per_second": 38.543,
361
+ "eval_steps_per_second": 4.819,
362
+ "eval_wer": 0.3289055094594419,
363
+ "step": 6000
364
+ },
365
+ {
366
+ "epoch": 2.3166023166023164,
367
+ "step": 6000,
368
+ "total_flos": 1.261526897313927e+19,
369
+ "train_loss": 0.8721037826538086,
370
+ "train_runtime": 9008.7975,
371
+ "train_samples_per_second": 10.656,
372
+ "train_steps_per_second": 0.666
373
  }
374
  ],
375
  "logging_steps": 500,
376
+ "max_steps": 6000,
377
  "num_input_tokens_seen": 0,
378
+ "num_train_epochs": 3,
379
  "save_steps": 500,
380
  "stateful_callbacks": {
381
  "TrainerControl": {
 
389
  "attributes": {}
390
  }
391
  },
392
+ "total_flos": 1.261526897313927e+19,
393
  "train_batch_size": 16,
394
  "trial_name": null,
395
  "trial_params": null