arun100 commited on
Commit
2f5ed80
1 Parent(s): 7972beb

End of training

Browse files
README.md CHANGED
@@ -1,39 +1,42 @@
1
  ---
 
 
2
  license: apache-2.0
3
  base_model: xmzhu/whisper-tiny-zh
4
  tags:
 
5
  - generated_from_trainer
6
  datasets:
7
- - common_voice_16_0
8
  metrics:
9
  - wer
10
  model-index:
11
- - name: xmzhu/whisper-tiny-zh
12
  results:
13
  - task:
14
  name: Automatic Speech Recognition
15
  type: automatic-speech-recognition
16
  dataset:
17
- name: common_voice_16_0
18
- type: common_voice_16_0
19
  config: zh-CN
20
  split: test
21
  args: zh-CN
22
  metrics:
23
  - name: Wer
24
  type: wer
25
- value: 91.15267507612005
26
  ---
27
 
28
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
29
  should probably proofread and complete it, then remove this comment. -->
30
 
31
- # xmzhu/whisper-tiny-zh
32
 
33
- This model is a fine-tuned version of [xmzhu/whisper-tiny-zh](https://huggingface.co/xmzhu/whisper-tiny-zh) on the common_voice_16_0 dataset.
34
  It achieves the following results on the evaluation set:
35
- - Loss: 0.5744
36
- - Wer: 91.1527
37
 
38
  ## Model description
39
 
 
1
  ---
2
+ language:
3
+ - zh
4
  license: apache-2.0
5
  base_model: xmzhu/whisper-tiny-zh
6
  tags:
7
+ - whisper-event
8
  - generated_from_trainer
9
  datasets:
10
+ - mozilla-foundation/common_voice_16_0
11
  metrics:
12
  - wer
13
  model-index:
14
+ - name: Whisper Base Chinese-Mandarin
15
  results:
16
  - task:
17
  name: Automatic Speech Recognition
18
  type: automatic-speech-recognition
19
  dataset:
20
+ name: mozilla-foundation/common_voice_16_0 zh-CN
21
+ type: mozilla-foundation/common_voice_16_0
22
  config: zh-CN
23
  split: test
24
  args: zh-CN
25
  metrics:
26
  - name: Wer
27
  type: wer
28
+ value: 91.12657677250978
29
  ---
30
 
31
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
32
  should probably proofread and complete it, then remove this comment. -->
33
 
34
+ # Whisper Base Chinese-Mandarin
35
 
36
+ This model is a fine-tuned version of [xmzhu/whisper-tiny-zh](https://huggingface.co/xmzhu/whisper-tiny-zh) on the mozilla-foundation/common_voice_16_0 zh-CN dataset.
37
  It achieves the following results on the evaluation set:
38
+ - Loss: 0.5759
39
+ - Wer: 91.1266
40
 
41
  ## Model description
42
 
all_results.json ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 3.0,
3
+ "eval_loss": 0.5759375691413879,
4
+ "eval_runtime": 866.9447,
5
+ "eval_samples_per_second": 12.257,
6
+ "eval_steps_per_second": 0.384,
7
+ "eval_wer": 91.12657677250978,
8
+ "train_loss": 0.640758861541748,
9
+ "train_runtime": 6192.7747,
10
+ "train_samples_per_second": 10.335,
11
+ "train_steps_per_second": 0.161
12
+ }
eval_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 3.0,
3
+ "eval_loss": 0.5759375691413879,
4
+ "eval_runtime": 866.9447,
5
+ "eval_samples_per_second": 12.257,
6
+ "eval_steps_per_second": 0.384,
7
+ "eval_wer": 91.12657677250978
8
+ }
runs/Apr21_23-18-57_ip-172-31-23-125/events.out.tfevents.1713748612.ip-172-31-23-125.1270.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:969140fd5a75b1dc815fdf24a8e1addb8d39558daa30c3a1f5914c303c09b3c7
3
+ size 406
train_results.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 3.0,
3
+ "train_loss": 0.640758861541748,
4
+ "train_runtime": 6192.7747,
5
+ "train_samples_per_second": 10.335,
6
+ "train_steps_per_second": 0.161
7
+ }
trainer_state.json ADDED
@@ -0,0 +1,315 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 91.12657677250978,
3
+ "best_model_checkpoint": "./checkpoint-600",
4
+ "epoch": 3.0025,
5
+ "eval_steps": 200,
6
+ "global_step": 1000,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.03,
13
+ "learning_rate": 6e-08,
14
+ "loss": 0.6521,
15
+ "step": 25
16
+ },
17
+ {
18
+ "epoch": 0.05,
19
+ "learning_rate": 1.2e-07,
20
+ "loss": 0.6708,
21
+ "step": 50
22
+ },
23
+ {
24
+ "epoch": 0.07,
25
+ "learning_rate": 1.825e-07,
26
+ "loss": 0.6688,
27
+ "step": 75
28
+ },
29
+ {
30
+ "epoch": 0.1,
31
+ "learning_rate": 2.45e-07,
32
+ "loss": 0.7849,
33
+ "step": 100
34
+ },
35
+ {
36
+ "epoch": 0.12,
37
+ "learning_rate": 3.0749999999999997e-07,
38
+ "loss": 0.6421,
39
+ "step": 125
40
+ },
41
+ {
42
+ "epoch": 0.15,
43
+ "learning_rate": 3.7e-07,
44
+ "loss": 0.717,
45
+ "step": 150
46
+ },
47
+ {
48
+ "epoch": 0.17,
49
+ "learning_rate": 4.325e-07,
50
+ "loss": 0.786,
51
+ "step": 175
52
+ },
53
+ {
54
+ "epoch": 0.2,
55
+ "learning_rate": 4.95e-07,
56
+ "loss": 0.6689,
57
+ "step": 200
58
+ },
59
+ {
60
+ "epoch": 0.2,
61
+ "eval_loss": 0.5854436159133911,
62
+ "eval_runtime": 839.3873,
63
+ "eval_samples_per_second": 12.659,
64
+ "eval_steps_per_second": 0.397,
65
+ "eval_wer": 91.63114397564158,
66
+ "step": 200
67
+ },
68
+ {
69
+ "epoch": 0.23,
70
+ "learning_rate": 4.856249999999999e-07,
71
+ "loss": 0.4781,
72
+ "step": 225
73
+ },
74
+ {
75
+ "epoch": 0.25,
76
+ "learning_rate": 4.6999999999999995e-07,
77
+ "loss": 0.5582,
78
+ "step": 250
79
+ },
80
+ {
81
+ "epoch": 0.28,
82
+ "learning_rate": 4.5437499999999994e-07,
83
+ "loss": 0.6674,
84
+ "step": 275
85
+ },
86
+ {
87
+ "epoch": 0.3,
88
+ "learning_rate": 4.3874999999999993e-07,
89
+ "loss": 0.606,
90
+ "step": 300
91
+ },
92
+ {
93
+ "epoch": 0.33,
94
+ "learning_rate": 4.2312499999999997e-07,
95
+ "loss": 0.6115,
96
+ "step": 325
97
+ },
98
+ {
99
+ "epoch": 1.02,
100
+ "learning_rate": 4.0749999999999996e-07,
101
+ "loss": 0.565,
102
+ "step": 350
103
+ },
104
+ {
105
+ "epoch": 1.04,
106
+ "learning_rate": 3.9187499999999995e-07,
107
+ "loss": 0.704,
108
+ "step": 375
109
+ },
110
+ {
111
+ "epoch": 1.07,
112
+ "learning_rate": 3.7624999999999994e-07,
113
+ "loss": 0.6314,
114
+ "step": 400
115
+ },
116
+ {
117
+ "epoch": 1.07,
118
+ "eval_loss": 0.5791469216346741,
119
+ "eval_runtime": 835.5799,
120
+ "eval_samples_per_second": 12.717,
121
+ "eval_steps_per_second": 0.399,
122
+ "eval_wer": 91.17877337973032,
123
+ "step": 400
124
+ },
125
+ {
126
+ "epoch": 1.09,
127
+ "learning_rate": 3.6062499999999994e-07,
128
+ "loss": 0.7393,
129
+ "step": 425
130
+ },
131
+ {
132
+ "epoch": 1.12,
133
+ "learning_rate": 3.45e-07,
134
+ "loss": 0.6695,
135
+ "step": 450
136
+ },
137
+ {
138
+ "epoch": 1.14,
139
+ "learning_rate": 3.2937499999999997e-07,
140
+ "loss": 0.7078,
141
+ "step": 475
142
+ },
143
+ {
144
+ "epoch": 1.17,
145
+ "learning_rate": 3.1374999999999996e-07,
146
+ "loss": 0.7056,
147
+ "step": 500
148
+ },
149
+ {
150
+ "epoch": 1.19,
151
+ "learning_rate": 2.9812499999999995e-07,
152
+ "loss": 0.7321,
153
+ "step": 525
154
+ },
155
+ {
156
+ "epoch": 1.22,
157
+ "learning_rate": 2.8249999999999994e-07,
158
+ "loss": 0.5089,
159
+ "step": 550
160
+ },
161
+ {
162
+ "epoch": 1.24,
163
+ "learning_rate": 2.66875e-07,
164
+ "loss": 0.502,
165
+ "step": 575
166
+ },
167
+ {
168
+ "epoch": 1.27,
169
+ "learning_rate": 2.5125e-07,
170
+ "loss": 0.653,
171
+ "step": 600
172
+ },
173
+ {
174
+ "epoch": 1.27,
175
+ "eval_loss": 0.5759375691413879,
176
+ "eval_runtime": 857.6045,
177
+ "eval_samples_per_second": 12.39,
178
+ "eval_steps_per_second": 0.388,
179
+ "eval_wer": 91.12657677250978,
180
+ "step": 600
181
+ },
182
+ {
183
+ "epoch": 1.29,
184
+ "learning_rate": 2.35625e-07,
185
+ "loss": 0.5852,
186
+ "step": 625
187
+ },
188
+ {
189
+ "epoch": 1.32,
190
+ "learning_rate": 2.1999999999999998e-07,
191
+ "loss": 0.5856,
192
+ "step": 650
193
+ },
194
+ {
195
+ "epoch": 2.01,
196
+ "learning_rate": 2.04375e-07,
197
+ "loss": 0.5815,
198
+ "step": 675
199
+ },
200
+ {
201
+ "epoch": 2.04,
202
+ "learning_rate": 1.8875e-07,
203
+ "loss": 0.706,
204
+ "step": 700
205
+ },
206
+ {
207
+ "epoch": 2.06,
208
+ "learning_rate": 1.7312499999999998e-07,
209
+ "loss": 0.587,
210
+ "step": 725
211
+ },
212
+ {
213
+ "epoch": 2.08,
214
+ "learning_rate": 1.575e-07,
215
+ "loss": 0.7652,
216
+ "step": 750
217
+ },
218
+ {
219
+ "epoch": 2.11,
220
+ "learning_rate": 1.41875e-07,
221
+ "loss": 0.6404,
222
+ "step": 775
223
+ },
224
+ {
225
+ "epoch": 2.13,
226
+ "learning_rate": 1.2624999999999998e-07,
227
+ "loss": 0.699,
228
+ "step": 800
229
+ },
230
+ {
231
+ "epoch": 2.13,
232
+ "eval_loss": 0.5749017596244812,
233
+ "eval_runtime": 839.408,
234
+ "eval_samples_per_second": 12.659,
235
+ "eval_steps_per_second": 0.397,
236
+ "eval_wer": 91.20487168334058,
237
+ "step": 800
238
+ },
239
+ {
240
+ "epoch": 2.16,
241
+ "learning_rate": 1.10625e-07,
242
+ "loss": 0.6785,
243
+ "step": 825
244
+ },
245
+ {
246
+ "epoch": 2.19,
247
+ "learning_rate": 9.499999999999999e-08,
248
+ "loss": 0.7806,
249
+ "step": 850
250
+ },
251
+ {
252
+ "epoch": 2.21,
253
+ "learning_rate": 7.9375e-08,
254
+ "loss": 0.5384,
255
+ "step": 875
256
+ },
257
+ {
258
+ "epoch": 2.23,
259
+ "learning_rate": 6.375e-08,
260
+ "loss": 0.4755,
261
+ "step": 900
262
+ },
263
+ {
264
+ "epoch": 2.26,
265
+ "learning_rate": 4.8124999999999996e-08,
266
+ "loss": 0.623,
267
+ "step": 925
268
+ },
269
+ {
270
+ "epoch": 2.29,
271
+ "learning_rate": 3.25e-08,
272
+ "loss": 0.6052,
273
+ "step": 950
274
+ },
275
+ {
276
+ "epoch": 2.31,
277
+ "learning_rate": 1.6875e-08,
278
+ "loss": 0.5875,
279
+ "step": 975
280
+ },
281
+ {
282
+ "epoch": 3.0,
283
+ "learning_rate": 1.25e-09,
284
+ "loss": 0.5613,
285
+ "step": 1000
286
+ },
287
+ {
288
+ "epoch": 3.0,
289
+ "eval_loss": 0.5744211077690125,
290
+ "eval_runtime": 835.9541,
291
+ "eval_samples_per_second": 12.711,
292
+ "eval_steps_per_second": 0.398,
293
+ "eval_wer": 91.15267507612005,
294
+ "step": 1000
295
+ },
296
+ {
297
+ "epoch": 3.0,
298
+ "step": 1000,
299
+ "total_flos": 1.57354066132992e+18,
300
+ "train_loss": 0.640758861541748,
301
+ "train_runtime": 6192.7747,
302
+ "train_samples_per_second": 10.335,
303
+ "train_steps_per_second": 0.161
304
+ }
305
+ ],
306
+ "logging_steps": 25,
307
+ "max_steps": 1000,
308
+ "num_input_tokens_seen": 0,
309
+ "num_train_epochs": 9223372036854775807,
310
+ "save_steps": 200,
311
+ "total_flos": 1.57354066132992e+18,
312
+ "train_batch_size": 32,
313
+ "trial_name": null,
314
+ "trial_params": null
315
+ }