gigant commited on
Commit
5661efc
1 Parent(s): 8481105

Training in progress, step 1000

Browse files
checkpoint-1000/config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "openai/whisper-small",
3
  "activation_dropout": 0.0,
4
  "activation_function": "gelu",
5
  "architectures": [
@@ -11,17 +11,17 @@
11
  50257
12
  ],
13
  "bos_token_id": 50257,
14
- "d_model": 768,
15
- "decoder_attention_heads": 12,
16
- "decoder_ffn_dim": 3072,
17
  "decoder_layerdrop": 0.0,
18
- "decoder_layers": 12,
19
  "decoder_start_token_id": 50258,
20
  "dropout": 0.0,
21
- "encoder_attention_heads": 12,
22
- "encoder_ffn_dim": 3072,
23
  "encoder_layerdrop": 0.0,
24
- "encoder_layers": 12,
25
  "eos_token_id": 50257,
26
  "forced_decoder_ids": null,
27
  "init_std": 0.02,
@@ -30,7 +30,7 @@
30
  "max_source_positions": 1500,
31
  "max_target_positions": 448,
32
  "model_type": "whisper",
33
- "num_hidden_layers": 12,
34
  "num_mel_bins": 80,
35
  "pad_token_id": 50257,
36
  "scale_embedding": false,
 
1
  {
2
+ "_name_or_path": "openai/whisper-medium",
3
  "activation_dropout": 0.0,
4
  "activation_function": "gelu",
5
  "architectures": [
 
11
  50257
12
  ],
13
  "bos_token_id": 50257,
14
+ "d_model": 1024,
15
+ "decoder_attention_heads": 16,
16
+ "decoder_ffn_dim": 4096,
17
  "decoder_layerdrop": 0.0,
18
+ "decoder_layers": 24,
19
  "decoder_start_token_id": 50258,
20
  "dropout": 0.0,
21
+ "encoder_attention_heads": 16,
22
+ "encoder_ffn_dim": 4096,
23
  "encoder_layerdrop": 0.0,
24
+ "encoder_layers": 24,
25
  "eos_token_id": 50257,
26
  "forced_decoder_ids": null,
27
  "init_std": 0.02,
 
30
  "max_source_positions": 1500,
31
  "max_target_positions": 448,
32
  "model_type": "whisper",
33
+ "num_hidden_layers": 24,
34
  "num_mel_bins": 80,
35
  "pad_token_id": 50257,
36
  "scale_embedding": false,
checkpoint-1000/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:87cdde1c69ccea77fc2d10883218b255caf5fd29a0d50ea75faa5550ffef7f19
3
- size 1934161093
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:af49b47896328ac338c2bcc0a0a9633edbceecd04542cc94134d901a533e16f8
3
+ size 6111428695
checkpoint-1000/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f90976fea374ad783c2542b5b16cffb3e21309e241eca73e9211168253b120e2
3
- size 967102601
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6ab09aea52de50a7a60e8ffee379732eaeae6a128802ab70f0a79b4df77d8852
3
+ size 3055754841
checkpoint-1000/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0cd183ac6e2c1c51a5ab296c5ecccf98b4ba8d20eae77a8c910a6c8ebf6f3dc4
3
  size 14575
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fcec21c75f2a56d1a885ca69805f92f00488b77824a9c72d1f47bbf304ea23f4
3
  size 14575
checkpoint-1000/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2292b0497ddb0554f3fc4518d7cc9b046879e509117234d9fc7434b7c01df20c
3
  size 557
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a18a0387d70684d7f96dff24b357b2696f9665783ff9db9a34d4aff631f8b03a
3
  size 557
checkpoint-1000/tokenizer_config.json CHANGED
@@ -19,7 +19,7 @@
19
  },
20
  "errors": "replace",
21
  "model_max_length": 1024,
22
- "name_or_path": "openai/whisper-small",
23
  "pad_token": null,
24
  "processor_class": "WhisperProcessor",
25
  "return_attention_mask": false,
 
19
  },
20
  "errors": "replace",
21
  "model_max_length": 1024,
22
+ "name_or_path": "openai/whisper-medium",
23
  "pad_token": null,
24
  "processor_class": "WhisperProcessor",
25
  "return_attention_mask": false,
checkpoint-1000/trainer_state.json CHANGED
@@ -1,265 +1,274 @@
1
  {
2
- "best_metric": 15.703064185124902,
3
  "best_model_checkpoint": "./checkpoint-1000",
4
- "epoch": 5.026,
5
  "global_step": 1000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
- "epoch": 0.01,
12
  "learning_rate": 4.2000000000000006e-07,
13
- "loss": 4.163,
14
  "step": 25
15
  },
16
  {
17
- "epoch": 0.01,
18
  "learning_rate": 9.200000000000001e-07,
19
- "loss": 3.2393,
20
  "step": 50
21
  },
22
  {
23
- "epoch": 0.01,
24
  "learning_rate": 1.42e-06,
25
- "loss": 2.1044,
26
  "step": 75
27
  },
28
  {
29
- "epoch": 0.02,
30
  "learning_rate": 1.9200000000000003e-06,
31
- "loss": 1.3638,
32
  "step": 100
33
  },
34
  {
35
- "epoch": 0.03,
36
  "learning_rate": 2.42e-06,
37
- "loss": 1.1844,
38
  "step": 125
39
  },
40
  {
41
- "epoch": 0.03,
42
  "learning_rate": 2.92e-06,
43
- "loss": 1.009,
44
  "step": 150
45
  },
46
  {
47
- "epoch": 1.0,
48
  "learning_rate": 3.4200000000000007e-06,
49
- "loss": 0.8762,
50
  "step": 175
51
  },
52
  {
53
- "epoch": 1.01,
54
  "learning_rate": 3.920000000000001e-06,
55
- "loss": 0.8188,
56
  "step": 200
57
  },
58
  {
59
- "epoch": 1.01,
60
  "learning_rate": 4.42e-06,
61
- "loss": 0.6994,
62
  "step": 225
63
  },
64
  {
65
- "epoch": 1.02,
66
  "learning_rate": 4.92e-06,
67
- "loss": 0.3518,
68
  "step": 250
69
  },
70
  {
71
- "epoch": 1.02,
72
  "learning_rate": 5.420000000000001e-06,
73
- "loss": 0.2302,
74
  "step": 275
75
  },
76
  {
77
- "epoch": 1.03,
78
  "learning_rate": 5.92e-06,
79
- "loss": 0.2269,
80
  "step": 300
81
  },
82
  {
83
- "epoch": 1.03,
84
  "learning_rate": 6.42e-06,
85
- "loss": 0.1694,
86
  "step": 325
87
  },
88
  {
89
- "epoch": 2.0,
90
  "learning_rate": 6.92e-06,
91
- "loss": 0.1352,
92
  "step": 350
93
  },
94
  {
95
- "epoch": 2.01,
96
  "learning_rate": 7.420000000000001e-06,
97
- "loss": 0.1625,
98
  "step": 375
99
  },
100
  {
101
- "epoch": 2.01,
102
  "learning_rate": 7.92e-06,
103
- "loss": 0.1407,
104
  "step": 400
105
  },
106
  {
107
- "epoch": 2.02,
108
  "learning_rate": 8.42e-06,
109
- "loss": 0.1125,
110
  "step": 425
111
  },
112
  {
113
- "epoch": 2.02,
114
  "learning_rate": 8.920000000000001e-06,
115
- "loss": 0.1035,
116
  "step": 450
117
  },
118
  {
119
- "epoch": 2.03,
120
  "learning_rate": 9.42e-06,
121
- "loss": 0.1027,
122
  "step": 475
123
  },
124
  {
125
- "epoch": 2.03,
126
  "learning_rate": 9.920000000000002e-06,
127
- "loss": 0.0734,
 
 
 
 
 
 
 
 
 
128
  "step": 500
129
  },
130
  {
131
- "epoch": 3.0,
132
  "learning_rate": 9.953333333333333e-06,
133
- "loss": 0.0636,
134
  "step": 525
135
  },
136
  {
137
- "epoch": 3.01,
138
  "learning_rate": 9.89777777777778e-06,
139
- "loss": 0.0785,
140
  "step": 550
141
  },
142
  {
143
- "epoch": 3.01,
144
  "learning_rate": 9.842222222222223e-06,
145
- "loss": 0.0641,
146
  "step": 575
147
  },
148
  {
149
- "epoch": 3.02,
150
  "learning_rate": 9.786666666666667e-06,
151
- "loss": 0.053,
152
  "step": 600
153
  },
154
  {
155
- "epoch": 3.02,
156
  "learning_rate": 9.731111111111113e-06,
157
- "loss": 0.0498,
158
  "step": 625
159
  },
160
  {
161
- "epoch": 3.03,
162
  "learning_rate": 9.675555555555555e-06,
163
- "loss": 0.0464,
164
  "step": 650
165
  },
166
  {
167
- "epoch": 3.03,
168
  "learning_rate": 9.620000000000001e-06,
169
- "loss": 0.0349,
170
  "step": 675
171
  },
172
  {
173
- "epoch": 4.0,
174
  "learning_rate": 9.564444444444445e-06,
175
- "loss": 0.0306,
176
  "step": 700
177
  },
178
  {
179
- "epoch": 4.01,
180
  "learning_rate": 9.508888888888889e-06,
181
- "loss": 0.037,
182
  "step": 725
183
  },
184
  {
185
- "epoch": 4.01,
186
  "learning_rate": 9.453333333333335e-06,
187
- "loss": 0.0337,
188
  "step": 750
189
  },
190
  {
191
- "epoch": 4.02,
192
  "learning_rate": 9.397777777777779e-06,
193
- "loss": 0.0263,
194
  "step": 775
195
  },
196
  {
197
- "epoch": 4.02,
198
  "learning_rate": 9.342222222222223e-06,
199
- "loss": 0.0279,
200
  "step": 800
201
  },
202
  {
203
- "epoch": 4.03,
204
  "learning_rate": 9.286666666666667e-06,
205
- "loss": 0.0247,
206
  "step": 825
207
  },
208
  {
209
- "epoch": 4.03,
210
  "learning_rate": 9.231111111111111e-06,
211
- "loss": 0.0171,
212
  "step": 850
213
  },
214
  {
215
- "epoch": 5.0,
216
  "learning_rate": 9.175555555555557e-06,
217
- "loss": 0.017,
218
  "step": 875
219
  },
220
  {
221
- "epoch": 5.01,
222
  "learning_rate": 9.12e-06,
223
- "loss": 0.0213,
224
  "step": 900
225
  },
226
  {
227
- "epoch": 5.01,
228
  "learning_rate": 9.064444444444447e-06,
229
- "loss": 0.0173,
230
  "step": 925
231
  },
232
  {
233
- "epoch": 5.02,
234
  "learning_rate": 9.008888888888889e-06,
235
- "loss": 0.0156,
236
  "step": 950
237
  },
238
  {
239
- "epoch": 5.02,
240
  "learning_rate": 8.953333333333335e-06,
241
- "loss": 0.0162,
242
  "step": 975
243
  },
244
  {
245
- "epoch": 5.03,
246
  "learning_rate": 8.897777777777779e-06,
247
- "loss": 0.0137,
248
  "step": 1000
249
  },
250
  {
251
- "epoch": 5.03,
252
- "eval_loss": 0.2068425565958023,
253
- "eval_runtime": 1070.693,
254
- "eval_samples_per_second": 3.604,
255
- "eval_steps_per_second": 0.451,
256
- "eval_wer": 15.703064185124902,
257
  "step": 1000
258
  }
259
  ],
260
  "max_steps": 5000,
261
- "num_train_epochs": 9223372036854775807,
262
- "total_flos": 1.84290636644352e+19,
263
  "trial_name": null,
264
  "trial_params": null
265
  }
 
1
  {
2
+ "best_metric": 7.904606892647371,
3
  "best_model_checkpoint": "./checkpoint-1000",
4
+ "epoch": 2.00802407221665,
5
  "global_step": 1000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
+ "epoch": 0.05,
12
  "learning_rate": 4.2000000000000006e-07,
13
+ "loss": 2.983,
14
  "step": 25
15
  },
16
  {
17
+ "epoch": 0.1,
18
  "learning_rate": 9.200000000000001e-07,
19
+ "loss": 2.4843,
20
  "step": 50
21
  },
22
  {
23
+ "epoch": 0.15,
24
  "learning_rate": 1.42e-06,
25
+ "loss": 1.6336,
26
  "step": 75
27
  },
28
  {
29
+ "epoch": 0.2,
30
  "learning_rate": 1.9200000000000003e-06,
31
+ "loss": 1.1537,
32
  "step": 100
33
  },
34
  {
35
+ "epoch": 0.25,
36
  "learning_rate": 2.42e-06,
37
+ "loss": 0.8284,
38
  "step": 125
39
  },
40
  {
41
+ "epoch": 0.3,
42
  "learning_rate": 2.92e-06,
43
+ "loss": 0.2948,
44
  "step": 150
45
  },
46
  {
47
+ "epoch": 0.35,
48
  "learning_rate": 3.4200000000000007e-06,
49
+ "loss": 0.1898,
50
  "step": 175
51
  },
52
  {
53
+ "epoch": 0.4,
54
  "learning_rate": 3.920000000000001e-06,
55
+ "loss": 0.1784,
56
  "step": 200
57
  },
58
  {
59
+ "epoch": 0.45,
60
  "learning_rate": 4.42e-06,
61
+ "loss": 0.1658,
62
  "step": 225
63
  },
64
  {
65
+ "epoch": 0.5,
66
  "learning_rate": 4.92e-06,
67
+ "loss": 0.1603,
68
  "step": 250
69
  },
70
  {
71
+ "epoch": 0.55,
72
  "learning_rate": 5.420000000000001e-06,
73
+ "loss": 0.1459,
74
  "step": 275
75
  },
76
  {
77
+ "epoch": 0.6,
78
  "learning_rate": 5.92e-06,
79
+ "loss": 0.1448,
80
  "step": 300
81
  },
82
  {
83
+ "epoch": 0.65,
84
  "learning_rate": 6.42e-06,
85
+ "loss": 0.1413,
86
  "step": 325
87
  },
88
  {
89
+ "epoch": 0.7,
90
  "learning_rate": 6.92e-06,
91
+ "loss": 0.1349,
92
  "step": 350
93
  },
94
  {
95
+ "epoch": 0.75,
96
  "learning_rate": 7.420000000000001e-06,
97
+ "loss": 0.1372,
98
  "step": 375
99
  },
100
  {
101
+ "epoch": 0.8,
102
  "learning_rate": 7.92e-06,
103
+ "loss": 0.1304,
104
  "step": 400
105
  },
106
  {
107
+ "epoch": 0.85,
108
  "learning_rate": 8.42e-06,
109
+ "loss": 0.128,
110
  "step": 425
111
  },
112
  {
113
+ "epoch": 0.9,
114
  "learning_rate": 8.920000000000001e-06,
115
+ "loss": 0.1228,
116
  "step": 450
117
  },
118
  {
119
+ "epoch": 0.95,
120
  "learning_rate": 9.42e-06,
121
+ "loss": 0.1198,
122
  "step": 475
123
  },
124
  {
125
+ "epoch": 1.0,
126
  "learning_rate": 9.920000000000002e-06,
127
+ "loss": 0.1176,
128
+ "step": 500
129
+ },
130
+ {
131
+ "epoch": 1.0,
132
+ "eval_loss": 0.10957513749599457,
133
+ "eval_runtime": 2168.1929,
134
+ "eval_samples_per_second": 1.78,
135
+ "eval_steps_per_second": 0.056,
136
+ "eval_wer": 10.749416177199066,
137
  "step": 500
138
  },
139
  {
140
+ "epoch": 1.05,
141
  "learning_rate": 9.953333333333333e-06,
142
+ "loss": 0.0829,
143
  "step": 525
144
  },
145
  {
146
+ "epoch": 1.1,
147
  "learning_rate": 9.89777777777778e-06,
148
+ "loss": 0.0721,
149
  "step": 550
150
  },
151
  {
152
+ "epoch": 1.15,
153
  "learning_rate": 9.842222222222223e-06,
154
+ "loss": 0.0747,
155
  "step": 575
156
  },
157
  {
158
+ "epoch": 1.2,
159
  "learning_rate": 9.786666666666667e-06,
160
+ "loss": 0.0781,
161
  "step": 600
162
  },
163
  {
164
+ "epoch": 1.25,
165
  "learning_rate": 9.731111111111113e-06,
166
+ "loss": 0.0769,
167
  "step": 625
168
  },
169
  {
170
+ "epoch": 1.3,
171
  "learning_rate": 9.675555555555555e-06,
172
+ "loss": 0.0755,
173
  "step": 650
174
  },
175
  {
176
+ "epoch": 1.36,
177
  "learning_rate": 9.620000000000001e-06,
178
+ "loss": 0.0805,
179
  "step": 675
180
  },
181
  {
182
+ "epoch": 1.41,
183
  "learning_rate": 9.564444444444445e-06,
184
+ "loss": 0.0733,
185
  "step": 700
186
  },
187
  {
188
+ "epoch": 1.46,
189
  "learning_rate": 9.508888888888889e-06,
190
+ "loss": 0.0696,
191
  "step": 725
192
  },
193
  {
194
+ "epoch": 1.51,
195
  "learning_rate": 9.453333333333335e-06,
196
+ "loss": 0.0682,
197
  "step": 750
198
  },
199
  {
200
+ "epoch": 1.56,
201
  "learning_rate": 9.397777777777779e-06,
202
+ "loss": 0.0727,
203
  "step": 775
204
  },
205
  {
206
+ "epoch": 1.61,
207
  "learning_rate": 9.342222222222223e-06,
208
+ "loss": 0.0724,
209
  "step": 800
210
  },
211
  {
212
+ "epoch": 1.66,
213
  "learning_rate": 9.286666666666667e-06,
214
+ "loss": 0.0688,
215
  "step": 825
216
  },
217
  {
218
+ "epoch": 1.71,
219
  "learning_rate": 9.231111111111111e-06,
220
+ "loss": 0.0672,
221
  "step": 850
222
  },
223
  {
224
+ "epoch": 1.76,
225
  "learning_rate": 9.175555555555557e-06,
226
+ "loss": 0.0705,
227
  "step": 875
228
  },
229
  {
230
+ "epoch": 1.81,
231
  "learning_rate": 9.12e-06,
232
+ "loss": 0.0714,
233
  "step": 900
234
  },
235
  {
236
+ "epoch": 1.86,
237
  "learning_rate": 9.064444444444447e-06,
238
+ "loss": 0.0635,
239
  "step": 925
240
  },
241
  {
242
+ "epoch": 1.91,
243
  "learning_rate": 9.008888888888889e-06,
244
+ "loss": 0.0639,
245
  "step": 950
246
  },
247
  {
248
+ "epoch": 1.96,
249
  "learning_rate": 8.953333333333335e-06,
250
+ "loss": 0.0728,
251
  "step": 975
252
  },
253
  {
254
+ "epoch": 2.01,
255
  "learning_rate": 8.897777777777779e-06,
256
+ "loss": 0.0605,
257
  "step": 1000
258
  },
259
  {
260
+ "epoch": 2.01,
261
+ "eval_loss": 0.08087032288312912,
262
+ "eval_runtime": 2054.0453,
263
+ "eval_samples_per_second": 1.879,
264
+ "eval_steps_per_second": 0.059,
265
+ "eval_wer": 7.904606892647371,
266
  "step": 1000
267
  }
268
  ],
269
  "max_steps": 5000,
270
+ "num_train_epochs": 11,
271
+ "total_flos": 6.532075529109504e+19,
272
  "trial_name": null,
273
  "trial_params": null
274
  }
checkpoint-1000/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:742d6336a47261393eaf2aa9094d49bc3919ce81a5a476e274ca09727f84d700
3
  size 3515
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e743affccad386edf49daf1d2a83ab221a7a5d57d4c0eb2be98eb95698bbbc49
3
  size 3515
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3b834663a22bdd000f5d4ea4c7d1ea42086b90f03e3eb67ab3e620a417d35aab
3
  size 3055754841
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6ab09aea52de50a7a60e8ffee379732eaeae6a128802ab70f0a79b4df77d8852
3
  size 3055754841