mzboito commited on
Commit
a26c659
1 Parent(s): 814c648

trainer_state

Browse files
Files changed (2) hide show
  1. README.md +2 -1
  2. trainer_state.json +227 -0
README.md CHANGED
@@ -38,8 +38,9 @@ Please check our blog post available at: TBD
38
  | **CommonVoice 17** | 16.0 | 4.9 | 19.0 | 6.5 |
39
 
40
  ## Training Parameters
 
41
  This is a [mHuBERT-147](https://huggingface.co/utter-project/mHuBERT-147) ASR fine-tuned model.
42
- The training parameters are available in [config.yaml](https://huggingface.co/naver/mHuBERT-147-ASR-fr/blob/main/config.yaml).
43
  We highlight the use of 0.3 for hubert.final_dropout, which we found to be very helpful in convergence. We also use fp32 training, as we found fp16 training to be unstable.
44
 
45
  ## ASR Model Class
 
38
  | **CommonVoice 17** | 16.0 | 4.9 | 19.0 | 6.5 |
39
 
40
  ## Training Parameters
41
+
42
  This is a [mHuBERT-147](https://huggingface.co/utter-project/mHuBERT-147) ASR fine-tuned model.
43
+ The training parameters are available in [config.json](https://huggingface.co/naver/mHuBERT-147-ASR-fr/blob/main/config.json).
44
  We highlight the use of 0.3 for hubert.final_dropout, which we found to be very helpful in convergence. We also use fp32 training, as we found fp16 training to be unstable.
45
 
46
  ## ASR Model Class
trainer_state.json ADDED
@@ -0,0 +1,227 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 6.602455701786633,
3
+ "best_model_checkpoint": "checkpoint-130000",
4
+ "epoch": 98.21512890735669,
5
+ "eval_steps": 10000,
6
+ "global_step": 130000,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 7.56,
13
+ "learning_rate": 3.7792894935752085e-05,
14
+ "loss": 3.1181,
15
+ "step": 10000
16
+ },
17
+ {
18
+ "epoch": 7.56,
19
+ "eval_cer": 12.22214052888268,
20
+ "eval_loss": 0.39288055896759033,
21
+ "eval_runtime": 80.6046,
22
+ "eval_samples_per_second": 6.178,
23
+ "eval_steps_per_second": 3.089,
24
+ "eval_wer": 38.847117794486216,
25
+ "step": 10000
26
+ },
27
+ {
28
+ "epoch": 15.11,
29
+ "learning_rate": 7.558578987150417e-05,
30
+ "loss": 0.4086,
31
+ "step": 20000
32
+ },
33
+ {
34
+ "epoch": 15.11,
35
+ "eval_cer": 9.646349533122564,
36
+ "eval_loss": 0.3199174404144287,
37
+ "eval_runtime": 78.777,
38
+ "eval_samples_per_second": 6.322,
39
+ "eval_steps_per_second": 3.161,
40
+ "eval_wer": 30.367585630743527,
41
+ "step": 20000
42
+ },
43
+ {
44
+ "epoch": 22.67,
45
+ "learning_rate": 9.665532879818595e-05,
46
+ "loss": 0.3126,
47
+ "step": 30000
48
+ },
49
+ {
50
+ "epoch": 22.67,
51
+ "eval_cer": 9.011592284881013,
52
+ "eval_loss": 0.3147233724594116,
53
+ "eval_runtime": 77.284,
54
+ "eval_samples_per_second": 6.444,
55
+ "eval_steps_per_second": 3.222,
56
+ "eval_wer": 28.390420495683657,
57
+ "step": 30000
58
+ },
59
+ {
60
+ "epoch": 30.22,
61
+ "learning_rate": 8.720710506424793e-05,
62
+ "loss": 0.2509,
63
+ "step": 40000
64
+ },
65
+ {
66
+ "epoch": 30.22,
67
+ "eval_cer": 8.134205818199643,
68
+ "eval_loss": 0.3039480447769165,
69
+ "eval_runtime": 68.0983,
70
+ "eval_samples_per_second": 7.313,
71
+ "eval_steps_per_second": 3.656,
72
+ "eval_wer": 26.427179058758004,
73
+ "step": 40000
74
+ },
75
+ {
76
+ "epoch": 37.78,
77
+ "learning_rate": 7.77588813303099e-05,
78
+ "loss": 0.2084,
79
+ "step": 50000
80
+ },
81
+ {
82
+ "epoch": 37.78,
83
+ "eval_cer": 7.7028649854177385,
84
+ "eval_loss": 0.2937542498111725,
85
+ "eval_runtime": 66.6192,
86
+ "eval_samples_per_second": 7.475,
87
+ "eval_steps_per_second": 3.738,
88
+ "eval_wer": 25.382901698691175,
89
+ "step": 50000
90
+ },
91
+ {
92
+ "epoch": 45.33,
93
+ "learning_rate": 6.831065759637189e-05,
94
+ "loss": 0.1794,
95
+ "step": 60000
96
+ },
97
+ {
98
+ "epoch": 45.33,
99
+ "eval_cer": 7.658750582065044,
100
+ "eval_loss": 0.3241848349571228,
101
+ "eval_runtime": 65.9465,
102
+ "eval_samples_per_second": 7.552,
103
+ "eval_steps_per_second": 3.776,
104
+ "eval_wer": 24.770258980785297,
105
+ "step": 60000
106
+ },
107
+ {
108
+ "epoch": 52.89,
109
+ "learning_rate": 5.886243386243386e-05,
110
+ "loss": 0.1566,
111
+ "step": 70000
112
+ },
113
+ {
114
+ "epoch": 52.89,
115
+ "eval_cer": 7.2984829546847045,
116
+ "eval_loss": 0.33441099524497986,
117
+ "eval_runtime": 66.4973,
118
+ "eval_samples_per_second": 7.489,
119
+ "eval_steps_per_second": 3.745,
120
+ "eval_wer": 24.33862433862434,
121
+ "step": 70000
122
+ },
123
+ {
124
+ "epoch": 60.44,
125
+ "learning_rate": 4.9414210128495846e-05,
126
+ "loss": 0.1381,
127
+ "step": 80000
128
+ },
129
+ {
130
+ "epoch": 60.44,
131
+ "eval_cer": 7.394064161948877,
132
+ "eval_loss": 0.3713204860687256,
133
+ "eval_runtime": 66.5533,
134
+ "eval_samples_per_second": 7.483,
135
+ "eval_steps_per_second": 3.741,
136
+ "eval_wer": 23.62851573377889,
137
+ "step": 80000
138
+ },
139
+ {
140
+ "epoch": 68.0,
141
+ "learning_rate": 3.9965986394557825e-05,
142
+ "loss": 0.1227,
143
+ "step": 90000
144
+ },
145
+ {
146
+ "epoch": 68.0,
147
+ "eval_cer": 7.109771340342622,
148
+ "eval_loss": 0.3827340006828308,
149
+ "eval_runtime": 66.2381,
150
+ "eval_samples_per_second": 7.518,
151
+ "eval_steps_per_second": 3.759,
152
+ "eval_wer": 22.946254525201894,
153
+ "step": 90000
154
+ },
155
+ {
156
+ "epoch": 75.55,
157
+ "learning_rate": 3.0517762660619804e-05,
158
+ "loss": 0.1097,
159
+ "step": 100000
160
+ },
161
+ {
162
+ "epoch": 75.55,
163
+ "eval_cer": 7.124476141460187,
164
+ "eval_loss": 0.415243923664093,
165
+ "eval_runtime": 149.6844,
166
+ "eval_samples_per_second": 3.327,
167
+ "eval_steps_per_second": 1.663,
168
+ "eval_wer": 22.988025619604567,
169
+ "step": 100000
170
+ },
171
+ {
172
+ "epoch": 83.11,
173
+ "learning_rate": 2.1069538926681782e-05,
174
+ "loss": 0.0988,
175
+ "step": 110000
176
+ },
177
+ {
178
+ "epoch": 83.11,
179
+ "eval_cer": 6.854888120971498,
180
+ "eval_loss": 0.4487506151199341,
181
+ "eval_runtime": 66.6595,
182
+ "eval_samples_per_second": 7.471,
183
+ "eval_steps_per_second": 3.735,
184
+ "eval_wer": 22.570314675577833,
185
+ "step": 110000
186
+ },
187
+ {
188
+ "epoch": 90.66,
189
+ "learning_rate": 1.1621315192743764e-05,
190
+ "loss": 0.0896,
191
+ "step": 120000
192
+ },
193
+ {
194
+ "epoch": 90.66,
195
+ "eval_cer": 6.808322917432541,
196
+ "eval_loss": 0.44282594323158264,
197
+ "eval_runtime": 66.7552,
198
+ "eval_samples_per_second": 7.46,
199
+ "eval_steps_per_second": 3.73,
200
+ "eval_wer": 21.99944305207463,
201
+ "step": 120000
202
+ },
203
+ {
204
+ "epoch": 98.22,
205
+ "learning_rate": 2.1730914588057445e-06,
206
+ "loss": 0.0823,
207
+ "step": 130000
208
+ },
209
+ {
210
+ "epoch": 98.22,
211
+ "eval_cer": 6.602455701786633,
212
+ "eval_loss": 0.4513276517391205,
213
+ "eval_runtime": 66.6171,
214
+ "eval_samples_per_second": 7.476,
215
+ "eval_steps_per_second": 3.738,
216
+ "eval_wer": 21.74881648565859,
217
+ "step": 130000
218
+ }
219
+ ],
220
+ "logging_steps": 10000,
221
+ "max_steps": 132300,
222
+ "num_train_epochs": 100,
223
+ "save_steps": 10000,
224
+ "total_flos": 4.063479904391249e+20,
225
+ "trial_name": null,
226
+ "trial_params": null
227
+ }