meiiny00 commited on
Commit
5a83ac3
1 Parent(s): 74c2299

Upload folder using huggingface_hub

Browse files
config.json ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "openai/whisper-large-v3-turbo",
3
+ "activation_dropout": 0.0,
4
+ "activation_function": "gelu",
5
+ "apply_spec_augment": false,
6
+ "architectures": [
7
+ "WhisperForConditionalGeneration"
8
+ ],
9
+ "attention_dropout": 0.0,
10
+ "begin_suppress_tokens": [
11
+ 220,
12
+ 50256
13
+ ],
14
+ "bos_token_id": 50257,
15
+ "classifier_proj_size": 256,
16
+ "d_model": 1280,
17
+ "decoder_attention_heads": 20,
18
+ "decoder_ffn_dim": 5120,
19
+ "decoder_layerdrop": 0.0,
20
+ "decoder_layers": 4,
21
+ "decoder_start_token_id": 50258,
22
+ "dropout": 0.0,
23
+ "encoder_attention_heads": 20,
24
+ "encoder_ffn_dim": 5120,
25
+ "encoder_layerdrop": 0.0,
26
+ "encoder_layers": 32,
27
+ "eos_token_id": 50257,
28
+ "init_std": 0.02,
29
+ "is_encoder_decoder": true,
30
+ "mask_feature_length": 10,
31
+ "mask_feature_min_masks": 0,
32
+ "mask_feature_prob": 0.0,
33
+ "mask_time_length": 10,
34
+ "mask_time_min_masks": 2,
35
+ "mask_time_prob": 0.05,
36
+ "max_source_positions": 1500,
37
+ "max_target_positions": 448,
38
+ "median_filter_width": 7,
39
+ "model_type": "whisper",
40
+ "num_hidden_layers": 32,
41
+ "num_mel_bins": 128,
42
+ "pad_token_id": 50257,
43
+ "scale_embedding": false,
44
+ "torch_dtype": "float32",
45
+ "transformers_version": "4.45.1",
46
+ "use_cache": true,
47
+ "use_weighted_layer_sum": false,
48
+ "vocab_size": 51866
49
+ }
generation_config.json ADDED
@@ -0,0 +1,241 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "alignment_heads": [
3
+ [
4
+ 2,
5
+ 4
6
+ ],
7
+ [
8
+ 2,
9
+ 11
10
+ ],
11
+ [
12
+ 3,
13
+ 3
14
+ ],
15
+ [
16
+ 3,
17
+ 6
18
+ ],
19
+ [
20
+ 3,
21
+ 11
22
+ ],
23
+ [
24
+ 3,
25
+ 14
26
+ ]
27
+ ],
28
+ "begin_suppress_tokens": [
29
+ 220,
30
+ 50257
31
+ ],
32
+ "bos_token_id": 50257,
33
+ "decoder_start_token_id": 50258,
34
+ "eos_token_id": 50257,
35
+ "is_multilingual": true,
36
+ "lang_to_id": {
37
+ "<|af|>": 50327,
38
+ "<|am|>": 50334,
39
+ "<|ar|>": 50272,
40
+ "<|as|>": 50350,
41
+ "<|az|>": 50304,
42
+ "<|ba|>": 50355,
43
+ "<|be|>": 50330,
44
+ "<|bg|>": 50292,
45
+ "<|bn|>": 50302,
46
+ "<|bo|>": 50347,
47
+ "<|br|>": 50309,
48
+ "<|bs|>": 50315,
49
+ "<|ca|>": 50270,
50
+ "<|cs|>": 50283,
51
+ "<|cy|>": 50297,
52
+ "<|da|>": 50285,
53
+ "<|de|>": 50261,
54
+ "<|el|>": 50281,
55
+ "<|en|>": 50259,
56
+ "<|es|>": 50262,
57
+ "<|et|>": 50307,
58
+ "<|eu|>": 50310,
59
+ "<|fa|>": 50300,
60
+ "<|fi|>": 50277,
61
+ "<|fo|>": 50338,
62
+ "<|fr|>": 50265,
63
+ "<|gl|>": 50319,
64
+ "<|gu|>": 50333,
65
+ "<|haw|>": 50352,
66
+ "<|ha|>": 50354,
67
+ "<|he|>": 50279,
68
+ "<|hi|>": 50276,
69
+ "<|hr|>": 50291,
70
+ "<|ht|>": 50339,
71
+ "<|hu|>": 50286,
72
+ "<|hy|>": 50312,
73
+ "<|id|>": 50275,
74
+ "<|is|>": 50311,
75
+ "<|it|>": 50274,
76
+ "<|ja|>": 50266,
77
+ "<|jw|>": 50356,
78
+ "<|ka|>": 50329,
79
+ "<|kk|>": 50316,
80
+ "<|km|>": 50323,
81
+ "<|kn|>": 50306,
82
+ "<|ko|>": 50264,
83
+ "<|la|>": 50294,
84
+ "<|lb|>": 50345,
85
+ "<|ln|>": 50353,
86
+ "<|lo|>": 50336,
87
+ "<|lt|>": 50293,
88
+ "<|lv|>": 50301,
89
+ "<|mg|>": 50349,
90
+ "<|mi|>": 50295,
91
+ "<|mk|>": 50308,
92
+ "<|ml|>": 50296,
93
+ "<|mn|>": 50314,
94
+ "<|mr|>": 50320,
95
+ "<|ms|>": 50282,
96
+ "<|mt|>": 50343,
97
+ "<|my|>": 50346,
98
+ "<|ne|>": 50313,
99
+ "<|nl|>": 50271,
100
+ "<|nn|>": 50342,
101
+ "<|no|>": 50288,
102
+ "<|oc|>": 50328,
103
+ "<|pa|>": 50321,
104
+ "<|pl|>": 50269,
105
+ "<|ps|>": 50340,
106
+ "<|pt|>": 50267,
107
+ "<|ro|>": 50284,
108
+ "<|ru|>": 50263,
109
+ "<|sa|>": 50344,
110
+ "<|sd|>": 50332,
111
+ "<|si|>": 50322,
112
+ "<|sk|>": 50298,
113
+ "<|sl|>": 50305,
114
+ "<|sn|>": 50324,
115
+ "<|so|>": 50326,
116
+ "<|sq|>": 50317,
117
+ "<|sr|>": 50303,
118
+ "<|su|>": 50357,
119
+ "<|sv|>": 50273,
120
+ "<|sw|>": 50318,
121
+ "<|ta|>": 50287,
122
+ "<|te|>": 50299,
123
+ "<|tg|>": 50331,
124
+ "<|th|>": 50289,
125
+ "<|tk|>": 50341,
126
+ "<|tl|>": 50348,
127
+ "<|tr|>": 50268,
128
+ "<|tt|>": 50351,
129
+ "<|uk|>": 50280,
130
+ "<|ur|>": 50290,
131
+ "<|uz|>": 50337,
132
+ "<|vi|>": 50278,
133
+ "<|yi|>": 50335,
134
+ "<|yo|>": 50325,
135
+ "<|yue|>": 50358,
136
+ "<|zh|>": 50260
137
+ },
138
+ "language": "Mongolian",
139
+ "max_initial_timestamp_index": 50,
140
+ "max_length": 448,
141
+ "no_timestamps_token_id": 50364,
142
+ "pad_token_id": 50257,
143
+ "prev_sot_token_id": 50362,
144
+ "return_timestamps": false,
145
+ "suppress_tokens": [
146
+ 1,
147
+ 2,
148
+ 7,
149
+ 8,
150
+ 9,
151
+ 10,
152
+ 14,
153
+ 25,
154
+ 26,
155
+ 27,
156
+ 28,
157
+ 29,
158
+ 31,
159
+ 58,
160
+ 59,
161
+ 60,
162
+ 61,
163
+ 62,
164
+ 63,
165
+ 90,
166
+ 91,
167
+ 92,
168
+ 93,
169
+ 359,
170
+ 503,
171
+ 522,
172
+ 542,
173
+ 873,
174
+ 893,
175
+ 902,
176
+ 918,
177
+ 922,
178
+ 931,
179
+ 1350,
180
+ 1853,
181
+ 1982,
182
+ 2460,
183
+ 2627,
184
+ 3246,
185
+ 3253,
186
+ 3268,
187
+ 3536,
188
+ 3846,
189
+ 3961,
190
+ 4183,
191
+ 4667,
192
+ 6585,
193
+ 6647,
194
+ 7273,
195
+ 9061,
196
+ 9383,
197
+ 10428,
198
+ 10929,
199
+ 11938,
200
+ 12033,
201
+ 12331,
202
+ 12562,
203
+ 13793,
204
+ 14157,
205
+ 14635,
206
+ 15265,
207
+ 15618,
208
+ 16553,
209
+ 16604,
210
+ 18362,
211
+ 18956,
212
+ 20075,
213
+ 21675,
214
+ 22520,
215
+ 26130,
216
+ 26161,
217
+ 26435,
218
+ 28279,
219
+ 29464,
220
+ 31650,
221
+ 32302,
222
+ 32470,
223
+ 36865,
224
+ 42863,
225
+ 47425,
226
+ 49870,
227
+ 50254,
228
+ 50258,
229
+ 50359,
230
+ 50360,
231
+ 50361,
232
+ 50362,
233
+ 50363
234
+ ],
235
+ "task": "transcribe",
236
+ "task_to_id": {
237
+ "transcribe": 50360,
238
+ "translate": 50359
239
+ },
240
+ "transformers_version": "4.45.1"
241
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f8aee478900f727946e3634b5cf15cc75be09e6d3af95bede3c87172ee207629
3
+ size 3235581408
optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9accc831b22227a8e7a217b6f2d51e305e7f1ab095863193ad6bdd0ab594770b
3
+ size 6456162404
preprocessor_config.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "chunk_length": 30,
3
+ "feature_extractor_type": "WhisperFeatureExtractor",
4
+ "feature_size": 128,
5
+ "hop_length": 160,
6
+ "n_fft": 400,
7
+ "n_samples": 480000,
8
+ "nb_max_frames": 3000,
9
+ "padding_side": "right",
10
+ "padding_value": 0.0,
11
+ "processor_class": "WhisperProcessor",
12
+ "return_attention_mask": false,
13
+ "sampling_rate": 16000
14
+ }
rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4a331edbe622d3be302f3a29820de21988f34c13f3c08b82fe8d4e50768b2dde
3
+ size 14244
scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bcd65f5c5ae513a1ca88894657fa81d516edccd9692a6884989dee9560a972a1
3
+ size 1064
trainer_state.json ADDED
@@ -0,0 +1,2345 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 15.246076710047603,
3
+ "best_model_checkpoint": "./whisper-turbo/checkpoint-8000",
4
+ "epoch": 3.4057045551298426,
5
+ "eval_steps": 1000,
6
+ "global_step": 8000,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.010642826734780758,
13
+ "grad_norm": 24.32292938232422,
14
+ "learning_rate": 2.4000000000000003e-07,
15
+ "loss": 2.3553,
16
+ "step": 25
17
+ },
18
+ {
19
+ "epoch": 0.021285653469561516,
20
+ "grad_norm": 13.848426818847656,
21
+ "learning_rate": 4.900000000000001e-07,
22
+ "loss": 1.8027,
23
+ "step": 50
24
+ },
25
+ {
26
+ "epoch": 0.031928480204342274,
27
+ "grad_norm": 12.110209465026855,
28
+ "learning_rate": 7.4e-07,
29
+ "loss": 1.43,
30
+ "step": 75
31
+ },
32
+ {
33
+ "epoch": 0.04257130693912303,
34
+ "grad_norm": 11.48365306854248,
35
+ "learning_rate": 9.9e-07,
36
+ "loss": 1.2826,
37
+ "step": 100
38
+ },
39
+ {
40
+ "epoch": 0.05321413367390379,
41
+ "grad_norm": 12.327783584594727,
42
+ "learning_rate": 1.2400000000000002e-06,
43
+ "loss": 1.0584,
44
+ "step": 125
45
+ },
46
+ {
47
+ "epoch": 0.06385696040868455,
48
+ "grad_norm": 10.366923332214355,
49
+ "learning_rate": 1.4900000000000001e-06,
50
+ "loss": 0.9736,
51
+ "step": 150
52
+ },
53
+ {
54
+ "epoch": 0.07449978714346531,
55
+ "grad_norm": 12.267701148986816,
56
+ "learning_rate": 1.74e-06,
57
+ "loss": 0.9528,
58
+ "step": 175
59
+ },
60
+ {
61
+ "epoch": 0.08514261387824607,
62
+ "grad_norm": 10.672560691833496,
63
+ "learning_rate": 1.9900000000000004e-06,
64
+ "loss": 0.8758,
65
+ "step": 200
66
+ },
67
+ {
68
+ "epoch": 0.09578544061302682,
69
+ "grad_norm": 11.000222206115723,
70
+ "learning_rate": 2.24e-06,
71
+ "loss": 0.7971,
72
+ "step": 225
73
+ },
74
+ {
75
+ "epoch": 0.10642826734780758,
76
+ "grad_norm": 10.404646873474121,
77
+ "learning_rate": 2.4900000000000003e-06,
78
+ "loss": 0.7646,
79
+ "step": 250
80
+ },
81
+ {
82
+ "epoch": 0.11707109408258834,
83
+ "grad_norm": 8.721226692199707,
84
+ "learning_rate": 2.7400000000000004e-06,
85
+ "loss": 0.7578,
86
+ "step": 275
87
+ },
88
+ {
89
+ "epoch": 0.1277139208173691,
90
+ "grad_norm": 9.198404312133789,
91
+ "learning_rate": 2.99e-06,
92
+ "loss": 0.6623,
93
+ "step": 300
94
+ },
95
+ {
96
+ "epoch": 0.13835674755214986,
97
+ "grad_norm": 9.394553184509277,
98
+ "learning_rate": 3.2400000000000003e-06,
99
+ "loss": 0.669,
100
+ "step": 325
101
+ },
102
+ {
103
+ "epoch": 0.14899957428693061,
104
+ "grad_norm": 9.5685453414917,
105
+ "learning_rate": 3.49e-06,
106
+ "loss": 0.6431,
107
+ "step": 350
108
+ },
109
+ {
110
+ "epoch": 0.15964240102171137,
111
+ "grad_norm": 10.325225830078125,
112
+ "learning_rate": 3.74e-06,
113
+ "loss": 0.6444,
114
+ "step": 375
115
+ },
116
+ {
117
+ "epoch": 0.17028522775649213,
118
+ "grad_norm": 8.178572654724121,
119
+ "learning_rate": 3.990000000000001e-06,
120
+ "loss": 0.609,
121
+ "step": 400
122
+ },
123
+ {
124
+ "epoch": 0.1809280544912729,
125
+ "grad_norm": 9.746500015258789,
126
+ "learning_rate": 4.24e-06,
127
+ "loss": 0.5755,
128
+ "step": 425
129
+ },
130
+ {
131
+ "epoch": 0.19157088122605365,
132
+ "grad_norm": 8.11845874786377,
133
+ "learning_rate": 4.49e-06,
134
+ "loss": 0.5589,
135
+ "step": 450
136
+ },
137
+ {
138
+ "epoch": 0.2022137079608344,
139
+ "grad_norm": 7.166477203369141,
140
+ "learning_rate": 4.74e-06,
141
+ "loss": 0.5578,
142
+ "step": 475
143
+ },
144
+ {
145
+ "epoch": 0.21285653469561516,
146
+ "grad_norm": 7.681941032409668,
147
+ "learning_rate": 4.9900000000000005e-06,
148
+ "loss": 0.5076,
149
+ "step": 500
150
+ },
151
+ {
152
+ "epoch": 0.22349936143039592,
153
+ "grad_norm": 8.438258171081543,
154
+ "learning_rate": 5.240000000000001e-06,
155
+ "loss": 0.5445,
156
+ "step": 525
157
+ },
158
+ {
159
+ "epoch": 0.23414218816517668,
160
+ "grad_norm": 9.802384376525879,
161
+ "learning_rate": 5.490000000000001e-06,
162
+ "loss": 0.5098,
163
+ "step": 550
164
+ },
165
+ {
166
+ "epoch": 0.24478501489995744,
167
+ "grad_norm": 7.197368144989014,
168
+ "learning_rate": 5.74e-06,
169
+ "loss": 0.4969,
170
+ "step": 575
171
+ },
172
+ {
173
+ "epoch": 0.2554278416347382,
174
+ "grad_norm": 7.666371822357178,
175
+ "learning_rate": 5.99e-06,
176
+ "loss": 0.4861,
177
+ "step": 600
178
+ },
179
+ {
180
+ "epoch": 0.2660706683695189,
181
+ "grad_norm": 7.324782371520996,
182
+ "learning_rate": 6.24e-06,
183
+ "loss": 0.481,
184
+ "step": 625
185
+ },
186
+ {
187
+ "epoch": 0.2767134951042997,
188
+ "grad_norm": 8.649055480957031,
189
+ "learning_rate": 6.4900000000000005e-06,
190
+ "loss": 0.447,
191
+ "step": 650
192
+ },
193
+ {
194
+ "epoch": 0.28735632183908044,
195
+ "grad_norm": 7.136589050292969,
196
+ "learning_rate": 6.740000000000001e-06,
197
+ "loss": 0.4778,
198
+ "step": 675
199
+ },
200
+ {
201
+ "epoch": 0.29799914857386123,
202
+ "grad_norm": 7.153022289276123,
203
+ "learning_rate": 6.99e-06,
204
+ "loss": 0.4674,
205
+ "step": 700
206
+ },
207
+ {
208
+ "epoch": 0.30864197530864196,
209
+ "grad_norm": 6.950058937072754,
210
+ "learning_rate": 7.24e-06,
211
+ "loss": 0.4434,
212
+ "step": 725
213
+ },
214
+ {
215
+ "epoch": 0.31928480204342274,
216
+ "grad_norm": 7.339558124542236,
217
+ "learning_rate": 7.49e-06,
218
+ "loss": 0.4439,
219
+ "step": 750
220
+ },
221
+ {
222
+ "epoch": 0.3299276287782035,
223
+ "grad_norm": 6.6849541664123535,
224
+ "learning_rate": 7.74e-06,
225
+ "loss": 0.442,
226
+ "step": 775
227
+ },
228
+ {
229
+ "epoch": 0.34057045551298426,
230
+ "grad_norm": 7.065944194793701,
231
+ "learning_rate": 7.990000000000001e-06,
232
+ "loss": 0.4147,
233
+ "step": 800
234
+ },
235
+ {
236
+ "epoch": 0.351213282247765,
237
+ "grad_norm": 6.242930889129639,
238
+ "learning_rate": 8.24e-06,
239
+ "loss": 0.3905,
240
+ "step": 825
241
+ },
242
+ {
243
+ "epoch": 0.3618561089825458,
244
+ "grad_norm": 6.885308742523193,
245
+ "learning_rate": 8.48e-06,
246
+ "loss": 0.4077,
247
+ "step": 850
248
+ },
249
+ {
250
+ "epoch": 0.3724989357173265,
251
+ "grad_norm": 5.589861869812012,
252
+ "learning_rate": 8.730000000000001e-06,
253
+ "loss": 0.4074,
254
+ "step": 875
255
+ },
256
+ {
257
+ "epoch": 0.3831417624521073,
258
+ "grad_norm": 6.651442050933838,
259
+ "learning_rate": 8.98e-06,
260
+ "loss": 0.4013,
261
+ "step": 900
262
+ },
263
+ {
264
+ "epoch": 0.393784589186888,
265
+ "grad_norm": 5.705496311187744,
266
+ "learning_rate": 9.230000000000001e-06,
267
+ "loss": 0.3897,
268
+ "step": 925
269
+ },
270
+ {
271
+ "epoch": 0.4044274159216688,
272
+ "grad_norm": 6.6162333488464355,
273
+ "learning_rate": 9.48e-06,
274
+ "loss": 0.365,
275
+ "step": 950
276
+ },
277
+ {
278
+ "epoch": 0.41507024265644954,
279
+ "grad_norm": 7.273537635803223,
280
+ "learning_rate": 9.73e-06,
281
+ "loss": 0.3924,
282
+ "step": 975
283
+ },
284
+ {
285
+ "epoch": 0.4257130693912303,
286
+ "grad_norm": 6.101346969604492,
287
+ "learning_rate": 9.980000000000001e-06,
288
+ "loss": 0.3715,
289
+ "step": 1000
290
+ },
291
+ {
292
+ "epoch": 0.4257130693912303,
293
+ "eval_loss": 0.3456858992576599,
294
+ "eval_runtime": 2451.0815,
295
+ "eval_samples_per_second": 1.949,
296
+ "eval_steps_per_second": 0.061,
297
+ "eval_wer": 40.46916574655637,
298
+ "step": 1000
299
+ },
300
+ {
301
+ "epoch": 0.43635589612601106,
302
+ "grad_norm": 5.119203567504883,
303
+ "learning_rate": 9.967142857142858e-06,
304
+ "loss": 0.3875,
305
+ "step": 1025
306
+ },
307
+ {
308
+ "epoch": 0.44699872286079184,
309
+ "grad_norm": 5.488610744476318,
310
+ "learning_rate": 9.931428571428571e-06,
311
+ "loss": 0.3741,
312
+ "step": 1050
313
+ },
314
+ {
315
+ "epoch": 0.45764154959557257,
316
+ "grad_norm": 5.710753917694092,
317
+ "learning_rate": 9.895714285714287e-06,
318
+ "loss": 0.3622,
319
+ "step": 1075
320
+ },
321
+ {
322
+ "epoch": 0.46828437633035336,
323
+ "grad_norm": 5.7168192863464355,
324
+ "learning_rate": 9.86e-06,
325
+ "loss": 0.3652,
326
+ "step": 1100
327
+ },
328
+ {
329
+ "epoch": 0.4789272030651341,
330
+ "grad_norm": 5.204087734222412,
331
+ "learning_rate": 9.824285714285716e-06,
332
+ "loss": 0.3696,
333
+ "step": 1125
334
+ },
335
+ {
336
+ "epoch": 0.4895700297999149,
337
+ "grad_norm": 5.014431476593018,
338
+ "learning_rate": 9.78857142857143e-06,
339
+ "loss": 0.347,
340
+ "step": 1150
341
+ },
342
+ {
343
+ "epoch": 0.5002128565346956,
344
+ "grad_norm": 5.643191814422607,
345
+ "learning_rate": 9.752857142857143e-06,
346
+ "loss": 0.3681,
347
+ "step": 1175
348
+ },
349
+ {
350
+ "epoch": 0.5108556832694764,
351
+ "grad_norm": 6.40764045715332,
352
+ "learning_rate": 9.717142857142858e-06,
353
+ "loss": 0.34,
354
+ "step": 1200
355
+ },
356
+ {
357
+ "epoch": 0.5214985100042572,
358
+ "grad_norm": 5.89484167098999,
359
+ "learning_rate": 9.681428571428572e-06,
360
+ "loss": 0.3257,
361
+ "step": 1225
362
+ },
363
+ {
364
+ "epoch": 0.5321413367390379,
365
+ "grad_norm": 6.474817752838135,
366
+ "learning_rate": 9.645714285714286e-06,
367
+ "loss": 0.3398,
368
+ "step": 1250
369
+ },
370
+ {
371
+ "epoch": 0.5427841634738186,
372
+ "grad_norm": 6.272877216339111,
373
+ "learning_rate": 9.610000000000001e-06,
374
+ "loss": 0.3315,
375
+ "step": 1275
376
+ },
377
+ {
378
+ "epoch": 0.5534269902085994,
379
+ "grad_norm": 4.669580936431885,
380
+ "learning_rate": 9.574285714285715e-06,
381
+ "loss": 0.3296,
382
+ "step": 1300
383
+ },
384
+ {
385
+ "epoch": 0.5640698169433802,
386
+ "grad_norm": 4.576137065887451,
387
+ "learning_rate": 9.538571428571428e-06,
388
+ "loss": 0.3334,
389
+ "step": 1325
390
+ },
391
+ {
392
+ "epoch": 0.5747126436781609,
393
+ "grad_norm": 4.5423665046691895,
394
+ "learning_rate": 9.502857142857144e-06,
395
+ "loss": 0.3087,
396
+ "step": 1350
397
+ },
398
+ {
399
+ "epoch": 0.5853554704129417,
400
+ "grad_norm": 4.673890113830566,
401
+ "learning_rate": 9.467142857142857e-06,
402
+ "loss": 0.3286,
403
+ "step": 1375
404
+ },
405
+ {
406
+ "epoch": 0.5959982971477225,
407
+ "grad_norm": 4.775241374969482,
408
+ "learning_rate": 9.431428571428573e-06,
409
+ "loss": 0.3258,
410
+ "step": 1400
411
+ },
412
+ {
413
+ "epoch": 0.6066411238825032,
414
+ "grad_norm": 5.259005069732666,
415
+ "learning_rate": 9.395714285714287e-06,
416
+ "loss": 0.29,
417
+ "step": 1425
418
+ },
419
+ {
420
+ "epoch": 0.6172839506172839,
421
+ "grad_norm": 4.7707014083862305,
422
+ "learning_rate": 9.360000000000002e-06,
423
+ "loss": 0.2879,
424
+ "step": 1450
425
+ },
426
+ {
427
+ "epoch": 0.6279267773520647,
428
+ "grad_norm": 4.998105525970459,
429
+ "learning_rate": 9.324285714285714e-06,
430
+ "loss": 0.2957,
431
+ "step": 1475
432
+ },
433
+ {
434
+ "epoch": 0.6385696040868455,
435
+ "grad_norm": 4.137457370758057,
436
+ "learning_rate": 9.28857142857143e-06,
437
+ "loss": 0.2897,
438
+ "step": 1500
439
+ },
440
+ {
441
+ "epoch": 0.6492124308216263,
442
+ "grad_norm": 4.685913562774658,
443
+ "learning_rate": 9.252857142857143e-06,
444
+ "loss": 0.3282,
445
+ "step": 1525
446
+ },
447
+ {
448
+ "epoch": 0.659855257556407,
449
+ "grad_norm": 5.36374044418335,
450
+ "learning_rate": 9.217142857142858e-06,
451
+ "loss": 0.2747,
452
+ "step": 1550
453
+ },
454
+ {
455
+ "epoch": 0.6704980842911877,
456
+ "grad_norm": 4.616824150085449,
457
+ "learning_rate": 9.181428571428572e-06,
458
+ "loss": 0.285,
459
+ "step": 1575
460
+ },
461
+ {
462
+ "epoch": 0.6811409110259685,
463
+ "grad_norm": 4.848719120025635,
464
+ "learning_rate": 9.145714285714287e-06,
465
+ "loss": 0.2971,
466
+ "step": 1600
467
+ },
468
+ {
469
+ "epoch": 0.6917837377607493,
470
+ "grad_norm": 4.435796737670898,
471
+ "learning_rate": 9.110000000000001e-06,
472
+ "loss": 0.2993,
473
+ "step": 1625
474
+ },
475
+ {
476
+ "epoch": 0.70242656449553,
477
+ "grad_norm": 4.055502414703369,
478
+ "learning_rate": 9.074285714285716e-06,
479
+ "loss": 0.2713,
480
+ "step": 1650
481
+ },
482
+ {
483
+ "epoch": 0.7130693912303108,
484
+ "grad_norm": 5.476015090942383,
485
+ "learning_rate": 9.038571428571428e-06,
486
+ "loss": 0.2553,
487
+ "step": 1675
488
+ },
489
+ {
490
+ "epoch": 0.7237122179650916,
491
+ "grad_norm": 4.443753242492676,
492
+ "learning_rate": 9.002857142857144e-06,
493
+ "loss": 0.2772,
494
+ "step": 1700
495
+ },
496
+ {
497
+ "epoch": 0.7343550446998723,
498
+ "grad_norm": 4.617072105407715,
499
+ "learning_rate": 8.967142857142857e-06,
500
+ "loss": 0.2745,
501
+ "step": 1725
502
+ },
503
+ {
504
+ "epoch": 0.744997871434653,
505
+ "grad_norm": 4.322467803955078,
506
+ "learning_rate": 8.931428571428573e-06,
507
+ "loss": 0.2756,
508
+ "step": 1750
509
+ },
510
+ {
511
+ "epoch": 0.7556406981694338,
512
+ "grad_norm": 5.194156169891357,
513
+ "learning_rate": 8.895714285714286e-06,
514
+ "loss": 0.2571,
515
+ "step": 1775
516
+ },
517
+ {
518
+ "epoch": 0.7662835249042146,
519
+ "grad_norm": 5.350680828094482,
520
+ "learning_rate": 8.860000000000002e-06,
521
+ "loss": 0.2705,
522
+ "step": 1800
523
+ },
524
+ {
525
+ "epoch": 0.7769263516389954,
526
+ "grad_norm": 5.343641757965088,
527
+ "learning_rate": 8.824285714285715e-06,
528
+ "loss": 0.2499,
529
+ "step": 1825
530
+ },
531
+ {
532
+ "epoch": 0.787569178373776,
533
+ "grad_norm": 4.356059551239014,
534
+ "learning_rate": 8.788571428571429e-06,
535
+ "loss": 0.2767,
536
+ "step": 1850
537
+ },
538
+ {
539
+ "epoch": 0.7982120051085568,
540
+ "grad_norm": 4.316229820251465,
541
+ "learning_rate": 8.752857142857144e-06,
542
+ "loss": 0.2484,
543
+ "step": 1875
544
+ },
545
+ {
546
+ "epoch": 0.8088548318433376,
547
+ "grad_norm": 4.627383232116699,
548
+ "learning_rate": 8.717142857142858e-06,
549
+ "loss": 0.2559,
550
+ "step": 1900
551
+ },
552
+ {
553
+ "epoch": 0.8194976585781183,
554
+ "grad_norm": 4.916121006011963,
555
+ "learning_rate": 8.681428571428572e-06,
556
+ "loss": 0.2755,
557
+ "step": 1925
558
+ },
559
+ {
560
+ "epoch": 0.8301404853128991,
561
+ "grad_norm": 5.244263172149658,
562
+ "learning_rate": 8.645714285714287e-06,
563
+ "loss": 0.2334,
564
+ "step": 1950
565
+ },
566
+ {
567
+ "epoch": 0.8407833120476799,
568
+ "grad_norm": 4.568859100341797,
569
+ "learning_rate": 8.61e-06,
570
+ "loss": 0.2542,
571
+ "step": 1975
572
+ },
573
+ {
574
+ "epoch": 0.8514261387824607,
575
+ "grad_norm": 3.6536848545074463,
576
+ "learning_rate": 8.574285714285714e-06,
577
+ "loss": 0.251,
578
+ "step": 2000
579
+ },
580
+ {
581
+ "epoch": 0.8514261387824607,
582
+ "eval_loss": 0.21811740100383759,
583
+ "eval_runtime": 2451.0584,
584
+ "eval_samples_per_second": 1.949,
585
+ "eval_steps_per_second": 0.061,
586
+ "eval_wer": 27.706481799916737,
587
+ "step": 2000
588
+ },
589
+ {
590
+ "epoch": 0.8620689655172413,
591
+ "grad_norm": 4.318572521209717,
592
+ "learning_rate": 8.53857142857143e-06,
593
+ "loss": 0.2319,
594
+ "step": 2025
595
+ },
596
+ {
597
+ "epoch": 0.8727117922520221,
598
+ "grad_norm": 4.489058494567871,
599
+ "learning_rate": 8.502857142857143e-06,
600
+ "loss": 0.2531,
601
+ "step": 2050
602
+ },
603
+ {
604
+ "epoch": 0.8833546189868029,
605
+ "grad_norm": 4.232712745666504,
606
+ "learning_rate": 8.467142857142859e-06,
607
+ "loss": 0.2491,
608
+ "step": 2075
609
+ },
610
+ {
611
+ "epoch": 0.8939974457215837,
612
+ "grad_norm": 4.031393051147461,
613
+ "learning_rate": 8.431428571428572e-06,
614
+ "loss": 0.2485,
615
+ "step": 2100
616
+ },
617
+ {
618
+ "epoch": 0.9046402724563644,
619
+ "grad_norm": 3.8136720657348633,
620
+ "learning_rate": 8.395714285714286e-06,
621
+ "loss": 0.2412,
622
+ "step": 2125
623
+ },
624
+ {
625
+ "epoch": 0.9152830991911451,
626
+ "grad_norm": 4.3343505859375,
627
+ "learning_rate": 8.36e-06,
628
+ "loss": 0.2378,
629
+ "step": 2150
630
+ },
631
+ {
632
+ "epoch": 0.9259259259259259,
633
+ "grad_norm": 3.6388914585113525,
634
+ "learning_rate": 8.324285714285715e-06,
635
+ "loss": 0.2409,
636
+ "step": 2175
637
+ },
638
+ {
639
+ "epoch": 0.9365687526607067,
640
+ "grad_norm": 5.596227169036865,
641
+ "learning_rate": 8.288571428571429e-06,
642
+ "loss": 0.2363,
643
+ "step": 2200
644
+ },
645
+ {
646
+ "epoch": 0.9472115793954874,
647
+ "grad_norm": 4.016772747039795,
648
+ "learning_rate": 8.252857142857144e-06,
649
+ "loss": 0.2281,
650
+ "step": 2225
651
+ },
652
+ {
653
+ "epoch": 0.9578544061302682,
654
+ "grad_norm": 5.106402397155762,
655
+ "learning_rate": 8.217142857142858e-06,
656
+ "loss": 0.224,
657
+ "step": 2250
658
+ },
659
+ {
660
+ "epoch": 0.968497232865049,
661
+ "grad_norm": 3.714061975479126,
662
+ "learning_rate": 8.181428571428573e-06,
663
+ "loss": 0.2306,
664
+ "step": 2275
665
+ },
666
+ {
667
+ "epoch": 0.9791400595998297,
668
+ "grad_norm": 4.1780009269714355,
669
+ "learning_rate": 8.145714285714287e-06,
670
+ "loss": 0.2284,
671
+ "step": 2300
672
+ },
673
+ {
674
+ "epoch": 0.9897828863346104,
675
+ "grad_norm": 4.007058143615723,
676
+ "learning_rate": 8.110000000000002e-06,
677
+ "loss": 0.237,
678
+ "step": 2325
679
+ },
680
+ {
681
+ "epoch": 1.0004257130693912,
682
+ "grad_norm": 2.91274094581604,
683
+ "learning_rate": 8.074285714285714e-06,
684
+ "loss": 0.227,
685
+ "step": 2350
686
+ },
687
+ {
688
+ "epoch": 1.0110685398041719,
689
+ "grad_norm": 4.321012496948242,
690
+ "learning_rate": 8.03857142857143e-06,
691
+ "loss": 0.1917,
692
+ "step": 2375
693
+ },
694
+ {
695
+ "epoch": 1.0217113665389528,
696
+ "grad_norm": 4.613705635070801,
697
+ "learning_rate": 8.002857142857143e-06,
698
+ "loss": 0.1861,
699
+ "step": 2400
700
+ },
701
+ {
702
+ "epoch": 1.0323541932737335,
703
+ "grad_norm": 3.9575023651123047,
704
+ "learning_rate": 7.967142857142858e-06,
705
+ "loss": 0.1931,
706
+ "step": 2425
707
+ },
708
+ {
709
+ "epoch": 1.0429970200085144,
710
+ "grad_norm": 4.651571273803711,
711
+ "learning_rate": 7.931428571428572e-06,
712
+ "loss": 0.1784,
713
+ "step": 2450
714
+ },
715
+ {
716
+ "epoch": 1.053639846743295,
717
+ "grad_norm": 4.0472412109375,
718
+ "learning_rate": 7.895714285714287e-06,
719
+ "loss": 0.2002,
720
+ "step": 2475
721
+ },
722
+ {
723
+ "epoch": 1.0642826734780757,
724
+ "grad_norm": 3.4641237258911133,
725
+ "learning_rate": 7.860000000000001e-06,
726
+ "loss": 0.1885,
727
+ "step": 2500
728
+ },
729
+ {
730
+ "epoch": 1.0749255002128566,
731
+ "grad_norm": 3.260540008544922,
732
+ "learning_rate": 7.824285714285715e-06,
733
+ "loss": 0.1924,
734
+ "step": 2525
735
+ },
736
+ {
737
+ "epoch": 1.0855683269476373,
738
+ "grad_norm": 4.416691303253174,
739
+ "learning_rate": 7.788571428571428e-06,
740
+ "loss": 0.181,
741
+ "step": 2550
742
+ },
743
+ {
744
+ "epoch": 1.096211153682418,
745
+ "grad_norm": 3.7334911823272705,
746
+ "learning_rate": 7.752857142857144e-06,
747
+ "loss": 0.1667,
748
+ "step": 2575
749
+ },
750
+ {
751
+ "epoch": 1.1068539804171988,
752
+ "grad_norm": 4.4988555908203125,
753
+ "learning_rate": 7.717142857142857e-06,
754
+ "loss": 0.1864,
755
+ "step": 2600
756
+ },
757
+ {
758
+ "epoch": 1.1174968071519795,
759
+ "grad_norm": 4.6382222175598145,
760
+ "learning_rate": 7.681428571428573e-06,
761
+ "loss": 0.1805,
762
+ "step": 2625
763
+ },
764
+ {
765
+ "epoch": 1.1281396338867604,
766
+ "grad_norm": 4.512842178344727,
767
+ "learning_rate": 7.645714285714286e-06,
768
+ "loss": 0.1848,
769
+ "step": 2650
770
+ },
771
+ {
772
+ "epoch": 1.138782460621541,
773
+ "grad_norm": 3.889390468597412,
774
+ "learning_rate": 7.610000000000001e-06,
775
+ "loss": 0.1846,
776
+ "step": 2675
777
+ },
778
+ {
779
+ "epoch": 1.1494252873563218,
780
+ "grad_norm": 4.247312068939209,
781
+ "learning_rate": 7.574285714285715e-06,
782
+ "loss": 0.1799,
783
+ "step": 2700
784
+ },
785
+ {
786
+ "epoch": 1.1600681140911027,
787
+ "grad_norm": 4.321536540985107,
788
+ "learning_rate": 7.53857142857143e-06,
789
+ "loss": 0.1764,
790
+ "step": 2725
791
+ },
792
+ {
793
+ "epoch": 1.1707109408258833,
794
+ "grad_norm": 4.06414794921875,
795
+ "learning_rate": 7.502857142857144e-06,
796
+ "loss": 0.1903,
797
+ "step": 2750
798
+ },
799
+ {
800
+ "epoch": 1.181353767560664,
801
+ "grad_norm": 3.314551591873169,
802
+ "learning_rate": 7.467142857142857e-06,
803
+ "loss": 0.1614,
804
+ "step": 2775
805
+ },
806
+ {
807
+ "epoch": 1.191996594295445,
808
+ "grad_norm": 4.245212078094482,
809
+ "learning_rate": 7.431428571428572e-06,
810
+ "loss": 0.1867,
811
+ "step": 2800
812
+ },
813
+ {
814
+ "epoch": 1.2026394210302256,
815
+ "grad_norm": 3.1465117931365967,
816
+ "learning_rate": 7.395714285714286e-06,
817
+ "loss": 0.1584,
818
+ "step": 2825
819
+ },
820
+ {
821
+ "epoch": 1.2132822477650063,
822
+ "grad_norm": 4.1284637451171875,
823
+ "learning_rate": 7.360000000000001e-06,
824
+ "loss": 0.1856,
825
+ "step": 2850
826
+ },
827
+ {
828
+ "epoch": 1.2239250744997872,
829
+ "grad_norm": 3.685889720916748,
830
+ "learning_rate": 7.324285714285715e-06,
831
+ "loss": 0.171,
832
+ "step": 2875
833
+ },
834
+ {
835
+ "epoch": 1.2345679012345678,
836
+ "grad_norm": 4.70512580871582,
837
+ "learning_rate": 7.28857142857143e-06,
838
+ "loss": 0.1752,
839
+ "step": 2900
840
+ },
841
+ {
842
+ "epoch": 1.2452107279693487,
843
+ "grad_norm": 3.846862316131592,
844
+ "learning_rate": 7.252857142857143e-06,
845
+ "loss": 0.1697,
846
+ "step": 2925
847
+ },
848
+ {
849
+ "epoch": 1.2558535547041294,
850
+ "grad_norm": 3.7466206550598145,
851
+ "learning_rate": 7.217142857142858e-06,
852
+ "loss": 0.1796,
853
+ "step": 2950
854
+ },
855
+ {
856
+ "epoch": 1.2664963814389103,
857
+ "grad_norm": 3.8162903785705566,
858
+ "learning_rate": 7.182857142857144e-06,
859
+ "loss": 0.1591,
860
+ "step": 2975
861
+ },
862
+ {
863
+ "epoch": 1.277139208173691,
864
+ "grad_norm": 3.880910873413086,
865
+ "learning_rate": 7.147142857142858e-06,
866
+ "loss": 0.1569,
867
+ "step": 3000
868
+ },
869
+ {
870
+ "epoch": 1.277139208173691,
871
+ "eval_loss": 0.1813717633485794,
872
+ "eval_runtime": 2447.146,
873
+ "eval_samples_per_second": 1.952,
874
+ "eval_steps_per_second": 0.061,
875
+ "eval_wer": 24.153347693087408,
876
+ "step": 3000
877
+ },
878
+ {
879
+ "epoch": 1.2877820349084717,
880
+ "grad_norm": 3.98262882232666,
881
+ "learning_rate": 7.111428571428572e-06,
882
+ "loss": 0.1804,
883
+ "step": 3025
884
+ },
885
+ {
886
+ "epoch": 1.2984248616432525,
887
+ "grad_norm": 3.6790521144866943,
888
+ "learning_rate": 7.075714285714286e-06,
889
+ "loss": 0.1647,
890
+ "step": 3050
891
+ },
892
+ {
893
+ "epoch": 1.3090676883780332,
894
+ "grad_norm": 3.431762456893921,
895
+ "learning_rate": 7.04e-06,
896
+ "loss": 0.1662,
897
+ "step": 3075
898
+ },
899
+ {
900
+ "epoch": 1.319710515112814,
901
+ "grad_norm": 4.0635247230529785,
902
+ "learning_rate": 7.004285714285715e-06,
903
+ "loss": 0.1726,
904
+ "step": 3100
905
+ },
906
+ {
907
+ "epoch": 1.3303533418475948,
908
+ "grad_norm": 3.1607766151428223,
909
+ "learning_rate": 6.968571428571429e-06,
910
+ "loss": 0.1544,
911
+ "step": 3125
912
+ },
913
+ {
914
+ "epoch": 1.3409961685823755,
915
+ "grad_norm": 4.5737385749816895,
916
+ "learning_rate": 6.932857142857143e-06,
917
+ "loss": 0.1644,
918
+ "step": 3150
919
+ },
920
+ {
921
+ "epoch": 1.3516389953171561,
922
+ "grad_norm": 4.182763576507568,
923
+ "learning_rate": 6.8971428571428575e-06,
924
+ "loss": 0.167,
925
+ "step": 3175
926
+ },
927
+ {
928
+ "epoch": 1.362281822051937,
929
+ "grad_norm": 3.3566439151763916,
930
+ "learning_rate": 6.861428571428572e-06,
931
+ "loss": 0.1631,
932
+ "step": 3200
933
+ },
934
+ {
935
+ "epoch": 1.3729246487867177,
936
+ "grad_norm": 3.771667718887329,
937
+ "learning_rate": 6.8257142857142866e-06,
938
+ "loss": 0.1675,
939
+ "step": 3225
940
+ },
941
+ {
942
+ "epoch": 1.3835674755214984,
943
+ "grad_norm": 4.14226770401001,
944
+ "learning_rate": 6.790000000000001e-06,
945
+ "loss": 0.1785,
946
+ "step": 3250
947
+ },
948
+ {
949
+ "epoch": 1.3942103022562793,
950
+ "grad_norm": 4.599484443664551,
951
+ "learning_rate": 6.754285714285715e-06,
952
+ "loss": 0.1832,
953
+ "step": 3275
954
+ },
955
+ {
956
+ "epoch": 1.40485312899106,
957
+ "grad_norm": 3.78108286857605,
958
+ "learning_rate": 6.718571428571428e-06,
959
+ "loss": 0.1765,
960
+ "step": 3300
961
+ },
962
+ {
963
+ "epoch": 1.4154959557258409,
964
+ "grad_norm": 3.3249051570892334,
965
+ "learning_rate": 6.682857142857143e-06,
966
+ "loss": 0.1517,
967
+ "step": 3325
968
+ },
969
+ {
970
+ "epoch": 1.4261387824606215,
971
+ "grad_norm": 3.299750804901123,
972
+ "learning_rate": 6.647142857142857e-06,
973
+ "loss": 0.1632,
974
+ "step": 3350
975
+ },
976
+ {
977
+ "epoch": 1.4367816091954024,
978
+ "grad_norm": 4.0860066413879395,
979
+ "learning_rate": 6.611428571428572e-06,
980
+ "loss": 0.1457,
981
+ "step": 3375
982
+ },
983
+ {
984
+ "epoch": 1.447424435930183,
985
+ "grad_norm": 4.305485725402832,
986
+ "learning_rate": 6.575714285714286e-06,
987
+ "loss": 0.1638,
988
+ "step": 3400
989
+ },
990
+ {
991
+ "epoch": 1.4580672626649638,
992
+ "grad_norm": 3.656642436981201,
993
+ "learning_rate": 6.540000000000001e-06,
994
+ "loss": 0.1681,
995
+ "step": 3425
996
+ },
997
+ {
998
+ "epoch": 1.4687100893997447,
999
+ "grad_norm": 3.596554756164551,
1000
+ "learning_rate": 6.504285714285715e-06,
1001
+ "loss": 0.1473,
1002
+ "step": 3450
1003
+ },
1004
+ {
1005
+ "epoch": 1.4793529161345254,
1006
+ "grad_norm": 3.35798716545105,
1007
+ "learning_rate": 6.46857142857143e-06,
1008
+ "loss": 0.1443,
1009
+ "step": 3475
1010
+ },
1011
+ {
1012
+ "epoch": 1.489995742869306,
1013
+ "grad_norm": 3.782789468765259,
1014
+ "learning_rate": 6.432857142857143e-06,
1015
+ "loss": 0.1399,
1016
+ "step": 3500
1017
+ },
1018
+ {
1019
+ "epoch": 1.500638569604087,
1020
+ "grad_norm": 3.556546926498413,
1021
+ "learning_rate": 6.397142857142857e-06,
1022
+ "loss": 0.1657,
1023
+ "step": 3525
1024
+ },
1025
+ {
1026
+ "epoch": 1.5112813963388676,
1027
+ "grad_norm": 4.0330657958984375,
1028
+ "learning_rate": 6.361428571428572e-06,
1029
+ "loss": 0.1455,
1030
+ "step": 3550
1031
+ },
1032
+ {
1033
+ "epoch": 1.5219242230736483,
1034
+ "grad_norm": 3.4194424152374268,
1035
+ "learning_rate": 6.325714285714286e-06,
1036
+ "loss": 0.1558,
1037
+ "step": 3575
1038
+ },
1039
+ {
1040
+ "epoch": 1.5325670498084292,
1041
+ "grad_norm": 3.4053897857666016,
1042
+ "learning_rate": 6.290000000000001e-06,
1043
+ "loss": 0.1667,
1044
+ "step": 3600
1045
+ },
1046
+ {
1047
+ "epoch": 1.5432098765432098,
1048
+ "grad_norm": 3.4398772716522217,
1049
+ "learning_rate": 6.254285714285715e-06,
1050
+ "loss": 0.1704,
1051
+ "step": 3625
1052
+ },
1053
+ {
1054
+ "epoch": 1.5538527032779905,
1055
+ "grad_norm": 3.950698137283325,
1056
+ "learning_rate": 6.21857142857143e-06,
1057
+ "loss": 0.1587,
1058
+ "step": 3650
1059
+ },
1060
+ {
1061
+ "epoch": 1.5644955300127714,
1062
+ "grad_norm": 3.5105514526367188,
1063
+ "learning_rate": 6.1828571428571434e-06,
1064
+ "loss": 0.1662,
1065
+ "step": 3675
1066
+ },
1067
+ {
1068
+ "epoch": 1.5751383567475523,
1069
+ "grad_norm": 3.1570792198181152,
1070
+ "learning_rate": 6.147142857142858e-06,
1071
+ "loss": 0.1542,
1072
+ "step": 3700
1073
+ },
1074
+ {
1075
+ "epoch": 1.5857811834823328,
1076
+ "grad_norm": 3.395730495452881,
1077
+ "learning_rate": 6.111428571428572e-06,
1078
+ "loss": 0.1419,
1079
+ "step": 3725
1080
+ },
1081
+ {
1082
+ "epoch": 1.5964240102171137,
1083
+ "grad_norm": 3.692760944366455,
1084
+ "learning_rate": 6.075714285714286e-06,
1085
+ "loss": 0.1515,
1086
+ "step": 3750
1087
+ },
1088
+ {
1089
+ "epoch": 1.6070668369518946,
1090
+ "grad_norm": 4.292817115783691,
1091
+ "learning_rate": 6.040000000000001e-06,
1092
+ "loss": 0.1558,
1093
+ "step": 3775
1094
+ },
1095
+ {
1096
+ "epoch": 1.617709663686675,
1097
+ "grad_norm": 2.7795393466949463,
1098
+ "learning_rate": 6.004285714285715e-06,
1099
+ "loss": 0.1603,
1100
+ "step": 3800
1101
+ },
1102
+ {
1103
+ "epoch": 1.628352490421456,
1104
+ "grad_norm": 3.6494193077087402,
1105
+ "learning_rate": 5.968571428571429e-06,
1106
+ "loss": 0.1527,
1107
+ "step": 3825
1108
+ },
1109
+ {
1110
+ "epoch": 1.6389953171562368,
1111
+ "grad_norm": 3.185007333755493,
1112
+ "learning_rate": 5.932857142857143e-06,
1113
+ "loss": 0.1415,
1114
+ "step": 3850
1115
+ },
1116
+ {
1117
+ "epoch": 1.6496381438910175,
1118
+ "grad_norm": 4.0278143882751465,
1119
+ "learning_rate": 5.897142857142858e-06,
1120
+ "loss": 0.1595,
1121
+ "step": 3875
1122
+ },
1123
+ {
1124
+ "epoch": 1.6602809706257982,
1125
+ "grad_norm": 3.8083670139312744,
1126
+ "learning_rate": 5.861428571428572e-06,
1127
+ "loss": 0.1596,
1128
+ "step": 3900
1129
+ },
1130
+ {
1131
+ "epoch": 1.670923797360579,
1132
+ "grad_norm": 5.412234783172607,
1133
+ "learning_rate": 5.825714285714286e-06,
1134
+ "loss": 0.1418,
1135
+ "step": 3925
1136
+ },
1137
+ {
1138
+ "epoch": 1.6815666240953597,
1139
+ "grad_norm": 3.8275325298309326,
1140
+ "learning_rate": 5.7900000000000005e-06,
1141
+ "loss": 0.1725,
1142
+ "step": 3950
1143
+ },
1144
+ {
1145
+ "epoch": 1.6922094508301404,
1146
+ "grad_norm": 3.4874017238616943,
1147
+ "learning_rate": 5.754285714285714e-06,
1148
+ "loss": 0.1334,
1149
+ "step": 3975
1150
+ },
1151
+ {
1152
+ "epoch": 1.7028522775649213,
1153
+ "grad_norm": 2.9034647941589355,
1154
+ "learning_rate": 5.718571428571429e-06,
1155
+ "loss": 0.1436,
1156
+ "step": 4000
1157
+ },
1158
+ {
1159
+ "epoch": 1.7028522775649213,
1160
+ "eval_loss": 0.1530725359916687,
1161
+ "eval_runtime": 2470.8785,
1162
+ "eval_samples_per_second": 1.933,
1163
+ "eval_steps_per_second": 0.061,
1164
+ "eval_wer": 20.381197169077055,
1165
+ "step": 4000
1166
+ },
1167
+ {
1168
+ "epoch": 1.713495104299702,
1169
+ "grad_norm": 3.192444086074829,
1170
+ "learning_rate": 5.682857142857143e-06,
1171
+ "loss": 0.1391,
1172
+ "step": 4025
1173
+ },
1174
+ {
1175
+ "epoch": 1.7241379310344827,
1176
+ "grad_norm": 3.376185655593872,
1177
+ "learning_rate": 5.647142857142858e-06,
1178
+ "loss": 0.1447,
1179
+ "step": 4050
1180
+ },
1181
+ {
1182
+ "epoch": 1.7347807577692635,
1183
+ "grad_norm": 3.2235193252563477,
1184
+ "learning_rate": 5.611428571428572e-06,
1185
+ "loss": 0.1473,
1186
+ "step": 4075
1187
+ },
1188
+ {
1189
+ "epoch": 1.7454235845040442,
1190
+ "grad_norm": 3.4376378059387207,
1191
+ "learning_rate": 5.575714285714287e-06,
1192
+ "loss": 0.1526,
1193
+ "step": 4100
1194
+ },
1195
+ {
1196
+ "epoch": 1.756066411238825,
1197
+ "grad_norm": 3.4150240421295166,
1198
+ "learning_rate": 5.540000000000001e-06,
1199
+ "loss": 0.1503,
1200
+ "step": 4125
1201
+ },
1202
+ {
1203
+ "epoch": 1.7667092379736058,
1204
+ "grad_norm": 3.757262706756592,
1205
+ "learning_rate": 5.504285714285714e-06,
1206
+ "loss": 0.1311,
1207
+ "step": 4150
1208
+ },
1209
+ {
1210
+ "epoch": 1.7773520647083867,
1211
+ "grad_norm": 3.725192070007324,
1212
+ "learning_rate": 5.4685714285714285e-06,
1213
+ "loss": 0.1506,
1214
+ "step": 4175
1215
+ },
1216
+ {
1217
+ "epoch": 1.7879948914431671,
1218
+ "grad_norm": 3.243486166000366,
1219
+ "learning_rate": 5.432857142857143e-06,
1220
+ "loss": 0.1529,
1221
+ "step": 4200
1222
+ },
1223
+ {
1224
+ "epoch": 1.798637718177948,
1225
+ "grad_norm": 3.1005189418792725,
1226
+ "learning_rate": 5.3971428571428575e-06,
1227
+ "loss": 0.1592,
1228
+ "step": 4225
1229
+ },
1230
+ {
1231
+ "epoch": 1.809280544912729,
1232
+ "grad_norm": 2.6923441886901855,
1233
+ "learning_rate": 5.361428571428572e-06,
1234
+ "loss": 0.1373,
1235
+ "step": 4250
1236
+ },
1237
+ {
1238
+ "epoch": 1.8199233716475096,
1239
+ "grad_norm": 3.4601283073425293,
1240
+ "learning_rate": 5.3257142857142865e-06,
1241
+ "loss": 0.1358,
1242
+ "step": 4275
1243
+ },
1244
+ {
1245
+ "epoch": 1.8305661983822903,
1246
+ "grad_norm": 4.46110200881958,
1247
+ "learning_rate": 5.290000000000001e-06,
1248
+ "loss": 0.1406,
1249
+ "step": 4300
1250
+ },
1251
+ {
1252
+ "epoch": 1.8412090251170712,
1253
+ "grad_norm": 3.4556360244750977,
1254
+ "learning_rate": 5.254285714285715e-06,
1255
+ "loss": 0.1314,
1256
+ "step": 4325
1257
+ },
1258
+ {
1259
+ "epoch": 1.8518518518518519,
1260
+ "grad_norm": 2.851836919784546,
1261
+ "learning_rate": 5.218571428571429e-06,
1262
+ "loss": 0.1263,
1263
+ "step": 4350
1264
+ },
1265
+ {
1266
+ "epoch": 1.8624946785866325,
1267
+ "grad_norm": 3.1507768630981445,
1268
+ "learning_rate": 5.182857142857143e-06,
1269
+ "loss": 0.1263,
1270
+ "step": 4375
1271
+ },
1272
+ {
1273
+ "epoch": 1.8731375053214134,
1274
+ "grad_norm": 3.7861220836639404,
1275
+ "learning_rate": 5.147142857142857e-06,
1276
+ "loss": 0.1423,
1277
+ "step": 4400
1278
+ },
1279
+ {
1280
+ "epoch": 1.883780332056194,
1281
+ "grad_norm": 2.670792818069458,
1282
+ "learning_rate": 5.111428571428572e-06,
1283
+ "loss": 0.1378,
1284
+ "step": 4425
1285
+ },
1286
+ {
1287
+ "epoch": 1.8944231587909748,
1288
+ "grad_norm": 3.21482515335083,
1289
+ "learning_rate": 5.075714285714286e-06,
1290
+ "loss": 0.1305,
1291
+ "step": 4450
1292
+ },
1293
+ {
1294
+ "epoch": 1.9050659855257557,
1295
+ "grad_norm": 3.0958456993103027,
1296
+ "learning_rate": 5.04e-06,
1297
+ "loss": 0.1312,
1298
+ "step": 4475
1299
+ },
1300
+ {
1301
+ "epoch": 1.9157088122605364,
1302
+ "grad_norm": 3.2010111808776855,
1303
+ "learning_rate": 5.0042857142857145e-06,
1304
+ "loss": 0.1358,
1305
+ "step": 4500
1306
+ },
1307
+ {
1308
+ "epoch": 1.926351638995317,
1309
+ "grad_norm": 4.211108684539795,
1310
+ "learning_rate": 4.968571428571429e-06,
1311
+ "loss": 0.149,
1312
+ "step": 4525
1313
+ },
1314
+ {
1315
+ "epoch": 1.936994465730098,
1316
+ "grad_norm": 3.6158218383789062,
1317
+ "learning_rate": 4.932857142857143e-06,
1318
+ "loss": 0.1456,
1319
+ "step": 4550
1320
+ },
1321
+ {
1322
+ "epoch": 1.9476372924648788,
1323
+ "grad_norm": 3.1304032802581787,
1324
+ "learning_rate": 4.897142857142857e-06,
1325
+ "loss": 0.1474,
1326
+ "step": 4575
1327
+ },
1328
+ {
1329
+ "epoch": 1.9582801191996593,
1330
+ "grad_norm": 3.7992565631866455,
1331
+ "learning_rate": 4.861428571428572e-06,
1332
+ "loss": 0.1252,
1333
+ "step": 4600
1334
+ },
1335
+ {
1336
+ "epoch": 1.9689229459344402,
1337
+ "grad_norm": 3.0859761238098145,
1338
+ "learning_rate": 4.825714285714286e-06,
1339
+ "loss": 0.1459,
1340
+ "step": 4625
1341
+ },
1342
+ {
1343
+ "epoch": 1.979565772669221,
1344
+ "grad_norm": 4.332040309906006,
1345
+ "learning_rate": 4.79e-06,
1346
+ "loss": 0.129,
1347
+ "step": 4650
1348
+ },
1349
+ {
1350
+ "epoch": 1.9902085994040017,
1351
+ "grad_norm": 4.2954816818237305,
1352
+ "learning_rate": 4.754285714285714e-06,
1353
+ "loss": 0.1566,
1354
+ "step": 4675
1355
+ },
1356
+ {
1357
+ "epoch": 2.0008514261387824,
1358
+ "grad_norm": 2.788947105407715,
1359
+ "learning_rate": 4.718571428571429e-06,
1360
+ "loss": 0.145,
1361
+ "step": 4700
1362
+ },
1363
+ {
1364
+ "epoch": 2.0114942528735633,
1365
+ "grad_norm": 3.2599875926971436,
1366
+ "learning_rate": 4.682857142857143e-06,
1367
+ "loss": 0.1063,
1368
+ "step": 4725
1369
+ },
1370
+ {
1371
+ "epoch": 2.0221370796083438,
1372
+ "grad_norm": 3.0225577354431152,
1373
+ "learning_rate": 4.647142857142857e-06,
1374
+ "loss": 0.0877,
1375
+ "step": 4750
1376
+ },
1377
+ {
1378
+ "epoch": 2.0327799063431247,
1379
+ "grad_norm": 3.564682960510254,
1380
+ "learning_rate": 4.6114285714285716e-06,
1381
+ "loss": 0.1014,
1382
+ "step": 4775
1383
+ },
1384
+ {
1385
+ "epoch": 2.0434227330779056,
1386
+ "grad_norm": 2.5339510440826416,
1387
+ "learning_rate": 4.575714285714286e-06,
1388
+ "loss": 0.0906,
1389
+ "step": 4800
1390
+ },
1391
+ {
1392
+ "epoch": 2.0540655598126865,
1393
+ "grad_norm": 2.7343597412109375,
1394
+ "learning_rate": 4.540000000000001e-06,
1395
+ "loss": 0.0994,
1396
+ "step": 4825
1397
+ },
1398
+ {
1399
+ "epoch": 2.064708386547467,
1400
+ "grad_norm": 2.6490981578826904,
1401
+ "learning_rate": 4.504285714285715e-06,
1402
+ "loss": 0.0979,
1403
+ "step": 4850
1404
+ },
1405
+ {
1406
+ "epoch": 2.075351213282248,
1407
+ "grad_norm": 1.9775068759918213,
1408
+ "learning_rate": 4.468571428571429e-06,
1409
+ "loss": 0.1023,
1410
+ "step": 4875
1411
+ },
1412
+ {
1413
+ "epoch": 2.0859940400170287,
1414
+ "grad_norm": 2.2302167415618896,
1415
+ "learning_rate": 4.432857142857143e-06,
1416
+ "loss": 0.101,
1417
+ "step": 4900
1418
+ },
1419
+ {
1420
+ "epoch": 2.096636866751809,
1421
+ "grad_norm": 2.7685494422912598,
1422
+ "learning_rate": 4.397142857142858e-06,
1423
+ "loss": 0.0934,
1424
+ "step": 4925
1425
+ },
1426
+ {
1427
+ "epoch": 2.10727969348659,
1428
+ "grad_norm": 2.8027827739715576,
1429
+ "learning_rate": 4.361428571428572e-06,
1430
+ "loss": 0.0962,
1431
+ "step": 4950
1432
+ },
1433
+ {
1434
+ "epoch": 2.117922520221371,
1435
+ "grad_norm": 2.9173505306243896,
1436
+ "learning_rate": 4.325714285714286e-06,
1437
+ "loss": 0.0934,
1438
+ "step": 4975
1439
+ },
1440
+ {
1441
+ "epoch": 2.1285653469561514,
1442
+ "grad_norm": 2.7315633296966553,
1443
+ "learning_rate": 4.2900000000000004e-06,
1444
+ "loss": 0.0931,
1445
+ "step": 5000
1446
+ },
1447
+ {
1448
+ "epoch": 2.1285653469561514,
1449
+ "eval_loss": 0.13744878768920898,
1450
+ "eval_runtime": 2441.1543,
1451
+ "eval_samples_per_second": 1.957,
1452
+ "eval_steps_per_second": 0.061,
1453
+ "eval_wer": 18.466161058519013,
1454
+ "step": 5000
1455
+ },
1456
+ {
1457
+ "epoch": 2.1392081736909323,
1458
+ "grad_norm": 2.411224126815796,
1459
+ "learning_rate": 4.254285714285715e-06,
1460
+ "loss": 0.1058,
1461
+ "step": 5025
1462
+ },
1463
+ {
1464
+ "epoch": 2.149851000425713,
1465
+ "grad_norm": 2.7599411010742188,
1466
+ "learning_rate": 4.2185714285714294e-06,
1467
+ "loss": 0.105,
1468
+ "step": 5050
1469
+ },
1470
+ {
1471
+ "epoch": 2.1604938271604937,
1472
+ "grad_norm": 2.873077392578125,
1473
+ "learning_rate": 4.182857142857143e-06,
1474
+ "loss": 0.1122,
1475
+ "step": 5075
1476
+ },
1477
+ {
1478
+ "epoch": 2.1711366538952745,
1479
+ "grad_norm": 2.4859185218811035,
1480
+ "learning_rate": 4.147142857142858e-06,
1481
+ "loss": 0.0956,
1482
+ "step": 5100
1483
+ },
1484
+ {
1485
+ "epoch": 2.1817794806300554,
1486
+ "grad_norm": 2.307053565979004,
1487
+ "learning_rate": 4.111428571428572e-06,
1488
+ "loss": 0.0936,
1489
+ "step": 5125
1490
+ },
1491
+ {
1492
+ "epoch": 2.192422307364836,
1493
+ "grad_norm": 2.692552328109741,
1494
+ "learning_rate": 4.075714285714286e-06,
1495
+ "loss": 0.0814,
1496
+ "step": 5150
1497
+ },
1498
+ {
1499
+ "epoch": 2.203065134099617,
1500
+ "grad_norm": 2.640380382537842,
1501
+ "learning_rate": 4.04e-06,
1502
+ "loss": 0.0961,
1503
+ "step": 5175
1504
+ },
1505
+ {
1506
+ "epoch": 2.2137079608343977,
1507
+ "grad_norm": 1.9715120792388916,
1508
+ "learning_rate": 4.004285714285715e-06,
1509
+ "loss": 0.0911,
1510
+ "step": 5200
1511
+ },
1512
+ {
1513
+ "epoch": 2.224350787569178,
1514
+ "grad_norm": 2.4855728149414062,
1515
+ "learning_rate": 3.9685714285714284e-06,
1516
+ "loss": 0.0871,
1517
+ "step": 5225
1518
+ },
1519
+ {
1520
+ "epoch": 2.234993614303959,
1521
+ "grad_norm": 2.190443992614746,
1522
+ "learning_rate": 3.932857142857143e-06,
1523
+ "loss": 0.0923,
1524
+ "step": 5250
1525
+ },
1526
+ {
1527
+ "epoch": 2.24563644103874,
1528
+ "grad_norm": 2.5768940448760986,
1529
+ "learning_rate": 3.8971428571428575e-06,
1530
+ "loss": 0.1033,
1531
+ "step": 5275
1532
+ },
1533
+ {
1534
+ "epoch": 2.256279267773521,
1535
+ "grad_norm": 2.527087926864624,
1536
+ "learning_rate": 3.861428571428571e-06,
1537
+ "loss": 0.081,
1538
+ "step": 5300
1539
+ },
1540
+ {
1541
+ "epoch": 2.2669220945083013,
1542
+ "grad_norm": 3.3411247730255127,
1543
+ "learning_rate": 3.825714285714286e-06,
1544
+ "loss": 0.0931,
1545
+ "step": 5325
1546
+ },
1547
+ {
1548
+ "epoch": 2.277564921243082,
1549
+ "grad_norm": 2.852933645248413,
1550
+ "learning_rate": 3.79e-06,
1551
+ "loss": 0.0736,
1552
+ "step": 5350
1553
+ },
1554
+ {
1555
+ "epoch": 2.288207747977863,
1556
+ "grad_norm": 3.596585512161255,
1557
+ "learning_rate": 3.7542857142857146e-06,
1558
+ "loss": 0.0892,
1559
+ "step": 5375
1560
+ },
1561
+ {
1562
+ "epoch": 2.2988505747126435,
1563
+ "grad_norm": 3.5326387882232666,
1564
+ "learning_rate": 3.7185714285714287e-06,
1565
+ "loss": 0.0827,
1566
+ "step": 5400
1567
+ },
1568
+ {
1569
+ "epoch": 2.3094934014474244,
1570
+ "grad_norm": 2.5857245922088623,
1571
+ "learning_rate": 3.682857142857143e-06,
1572
+ "loss": 0.0957,
1573
+ "step": 5425
1574
+ },
1575
+ {
1576
+ "epoch": 2.3201362281822053,
1577
+ "grad_norm": 2.7961575984954834,
1578
+ "learning_rate": 3.6471428571428573e-06,
1579
+ "loss": 0.0968,
1580
+ "step": 5450
1581
+ },
1582
+ {
1583
+ "epoch": 2.330779054916986,
1584
+ "grad_norm": 2.5830881595611572,
1585
+ "learning_rate": 3.611428571428572e-06,
1586
+ "loss": 0.086,
1587
+ "step": 5475
1588
+ },
1589
+ {
1590
+ "epoch": 2.3414218816517667,
1591
+ "grad_norm": 3.009079694747925,
1592
+ "learning_rate": 3.5757142857142863e-06,
1593
+ "loss": 0.0821,
1594
+ "step": 5500
1595
+ },
1596
+ {
1597
+ "epoch": 2.3520647083865476,
1598
+ "grad_norm": 3.2206666469573975,
1599
+ "learning_rate": 3.54e-06,
1600
+ "loss": 0.1,
1601
+ "step": 5525
1602
+ },
1603
+ {
1604
+ "epoch": 2.362707535121328,
1605
+ "grad_norm": 2.6536972522735596,
1606
+ "learning_rate": 3.5042857142857145e-06,
1607
+ "loss": 0.0911,
1608
+ "step": 5550
1609
+ },
1610
+ {
1611
+ "epoch": 2.373350361856109,
1612
+ "grad_norm": 2.0286781787872314,
1613
+ "learning_rate": 3.468571428571429e-06,
1614
+ "loss": 0.083,
1615
+ "step": 5575
1616
+ },
1617
+ {
1618
+ "epoch": 2.38399318859089,
1619
+ "grad_norm": 3.5354936122894287,
1620
+ "learning_rate": 3.4328571428571435e-06,
1621
+ "loss": 0.0994,
1622
+ "step": 5600
1623
+ },
1624
+ {
1625
+ "epoch": 2.3946360153256707,
1626
+ "grad_norm": 2.823812246322632,
1627
+ "learning_rate": 3.397142857142857e-06,
1628
+ "loss": 0.0921,
1629
+ "step": 5625
1630
+ },
1631
+ {
1632
+ "epoch": 2.405278842060451,
1633
+ "grad_norm": 3.5603067874908447,
1634
+ "learning_rate": 3.3614285714285717e-06,
1635
+ "loss": 0.1015,
1636
+ "step": 5650
1637
+ },
1638
+ {
1639
+ "epoch": 2.415921668795232,
1640
+ "grad_norm": 2.4219422340393066,
1641
+ "learning_rate": 3.325714285714286e-06,
1642
+ "loss": 0.098,
1643
+ "step": 5675
1644
+ },
1645
+ {
1646
+ "epoch": 2.4265644955300125,
1647
+ "grad_norm": 3.9650704860687256,
1648
+ "learning_rate": 3.2900000000000003e-06,
1649
+ "loss": 0.0914,
1650
+ "step": 5700
1651
+ },
1652
+ {
1653
+ "epoch": 2.4372073222647934,
1654
+ "grad_norm": 2.7661550045013428,
1655
+ "learning_rate": 3.2542857142857148e-06,
1656
+ "loss": 0.0733,
1657
+ "step": 5725
1658
+ },
1659
+ {
1660
+ "epoch": 2.4478501489995743,
1661
+ "grad_norm": 2.8396358489990234,
1662
+ "learning_rate": 3.218571428571429e-06,
1663
+ "loss": 0.0954,
1664
+ "step": 5750
1665
+ },
1666
+ {
1667
+ "epoch": 2.458492975734355,
1668
+ "grad_norm": 2.8353986740112305,
1669
+ "learning_rate": 3.182857142857143e-06,
1670
+ "loss": 0.0848,
1671
+ "step": 5775
1672
+ },
1673
+ {
1674
+ "epoch": 2.4691358024691357,
1675
+ "grad_norm": 2.9679837226867676,
1676
+ "learning_rate": 3.1471428571428574e-06,
1677
+ "loss": 0.084,
1678
+ "step": 5800
1679
+ },
1680
+ {
1681
+ "epoch": 2.4797786292039166,
1682
+ "grad_norm": 2.0554795265197754,
1683
+ "learning_rate": 3.111428571428572e-06,
1684
+ "loss": 0.0894,
1685
+ "step": 5825
1686
+ },
1687
+ {
1688
+ "epoch": 2.4904214559386975,
1689
+ "grad_norm": 2.5439860820770264,
1690
+ "learning_rate": 3.0757142857142856e-06,
1691
+ "loss": 0.0836,
1692
+ "step": 5850
1693
+ },
1694
+ {
1695
+ "epoch": 2.501064282673478,
1696
+ "grad_norm": 2.93955135345459,
1697
+ "learning_rate": 3.04e-06,
1698
+ "loss": 0.0915,
1699
+ "step": 5875
1700
+ },
1701
+ {
1702
+ "epoch": 2.511707109408259,
1703
+ "grad_norm": 2.3502097129821777,
1704
+ "learning_rate": 3.0042857142857146e-06,
1705
+ "loss": 0.0963,
1706
+ "step": 5900
1707
+ },
1708
+ {
1709
+ "epoch": 2.5223499361430397,
1710
+ "grad_norm": 2.289599895477295,
1711
+ "learning_rate": 2.968571428571429e-06,
1712
+ "loss": 0.0892,
1713
+ "step": 5925
1714
+ },
1715
+ {
1716
+ "epoch": 2.5329927628778206,
1717
+ "grad_norm": 4.718634128570557,
1718
+ "learning_rate": 2.932857142857143e-06,
1719
+ "loss": 0.089,
1720
+ "step": 5950
1721
+ },
1722
+ {
1723
+ "epoch": 2.543635589612601,
1724
+ "grad_norm": 2.9124553203582764,
1725
+ "learning_rate": 2.8971428571428573e-06,
1726
+ "loss": 0.0943,
1727
+ "step": 5975
1728
+ },
1729
+ {
1730
+ "epoch": 2.554278416347382,
1731
+ "grad_norm": 3.2406508922576904,
1732
+ "learning_rate": 2.861428571428572e-06,
1733
+ "loss": 0.0891,
1734
+ "step": 6000
1735
+ },
1736
+ {
1737
+ "epoch": 2.554278416347382,
1738
+ "eval_loss": 0.1251918077468872,
1739
+ "eval_runtime": 2435.0048,
1740
+ "eval_samples_per_second": 1.962,
1741
+ "eval_steps_per_second": 0.062,
1742
+ "eval_wer": 16.934856191286404,
1743
+ "step": 6000
1744
+ },
1745
+ {
1746
+ "epoch": 2.5649212430821624,
1747
+ "grad_norm": 2.5758533477783203,
1748
+ "learning_rate": 2.825714285714286e-06,
1749
+ "loss": 0.0909,
1750
+ "step": 6025
1751
+ },
1752
+ {
1753
+ "epoch": 2.5755640698169433,
1754
+ "grad_norm": 2.308535575866699,
1755
+ "learning_rate": 2.7900000000000004e-06,
1756
+ "loss": 0.0903,
1757
+ "step": 6050
1758
+ },
1759
+ {
1760
+ "epoch": 2.586206896551724,
1761
+ "grad_norm": 3.0140132904052734,
1762
+ "learning_rate": 2.7542857142857145e-06,
1763
+ "loss": 0.1005,
1764
+ "step": 6075
1765
+ },
1766
+ {
1767
+ "epoch": 2.596849723286505,
1768
+ "grad_norm": 3.0237767696380615,
1769
+ "learning_rate": 2.7185714285714286e-06,
1770
+ "loss": 0.1032,
1771
+ "step": 6100
1772
+ },
1773
+ {
1774
+ "epoch": 2.6074925500212855,
1775
+ "grad_norm": 2.413677930831909,
1776
+ "learning_rate": 2.682857142857143e-06,
1777
+ "loss": 0.0753,
1778
+ "step": 6125
1779
+ },
1780
+ {
1781
+ "epoch": 2.6181353767560664,
1782
+ "grad_norm": 2.406214475631714,
1783
+ "learning_rate": 2.6471428571428576e-06,
1784
+ "loss": 0.0744,
1785
+ "step": 6150
1786
+ },
1787
+ {
1788
+ "epoch": 2.628778203490847,
1789
+ "grad_norm": 2.9371650218963623,
1790
+ "learning_rate": 2.6114285714285712e-06,
1791
+ "loss": 0.0795,
1792
+ "step": 6175
1793
+ },
1794
+ {
1795
+ "epoch": 2.639421030225628,
1796
+ "grad_norm": 3.0647592544555664,
1797
+ "learning_rate": 2.5757142857142857e-06,
1798
+ "loss": 0.0885,
1799
+ "step": 6200
1800
+ },
1801
+ {
1802
+ "epoch": 2.6500638569604087,
1803
+ "grad_norm": 2.245195150375366,
1804
+ "learning_rate": 2.5400000000000002e-06,
1805
+ "loss": 0.0951,
1806
+ "step": 6225
1807
+ },
1808
+ {
1809
+ "epoch": 2.6607066836951896,
1810
+ "grad_norm": 3.212939977645874,
1811
+ "learning_rate": 2.5042857142857148e-06,
1812
+ "loss": 0.1081,
1813
+ "step": 6250
1814
+ },
1815
+ {
1816
+ "epoch": 2.67134951042997,
1817
+ "grad_norm": 2.987602949142456,
1818
+ "learning_rate": 2.468571428571429e-06,
1819
+ "loss": 0.0694,
1820
+ "step": 6275
1821
+ },
1822
+ {
1823
+ "epoch": 2.681992337164751,
1824
+ "grad_norm": 2.6746339797973633,
1825
+ "learning_rate": 2.4328571428571433e-06,
1826
+ "loss": 0.0879,
1827
+ "step": 6300
1828
+ },
1829
+ {
1830
+ "epoch": 2.692635163899532,
1831
+ "grad_norm": 2.3074121475219727,
1832
+ "learning_rate": 2.3971428571428574e-06,
1833
+ "loss": 0.0771,
1834
+ "step": 6325
1835
+ },
1836
+ {
1837
+ "epoch": 2.7032779906343123,
1838
+ "grad_norm": 2.62947940826416,
1839
+ "learning_rate": 2.361428571428572e-06,
1840
+ "loss": 0.0882,
1841
+ "step": 6350
1842
+ },
1843
+ {
1844
+ "epoch": 2.713920817369093,
1845
+ "grad_norm": 2.5452988147735596,
1846
+ "learning_rate": 2.325714285714286e-06,
1847
+ "loss": 0.081,
1848
+ "step": 6375
1849
+ },
1850
+ {
1851
+ "epoch": 2.724563644103874,
1852
+ "grad_norm": 1.9240838289260864,
1853
+ "learning_rate": 2.29e-06,
1854
+ "loss": 0.0672,
1855
+ "step": 6400
1856
+ },
1857
+ {
1858
+ "epoch": 2.735206470838655,
1859
+ "grad_norm": 2.3632349967956543,
1860
+ "learning_rate": 2.2542857142857146e-06,
1861
+ "loss": 0.0716,
1862
+ "step": 6425
1863
+ },
1864
+ {
1865
+ "epoch": 2.7458492975734354,
1866
+ "grad_norm": 1.9626713991165161,
1867
+ "learning_rate": 2.2185714285714287e-06,
1868
+ "loss": 0.0857,
1869
+ "step": 6450
1870
+ },
1871
+ {
1872
+ "epoch": 2.7564921243082163,
1873
+ "grad_norm": 1.8497956991195679,
1874
+ "learning_rate": 2.1828571428571428e-06,
1875
+ "loss": 0.0774,
1876
+ "step": 6475
1877
+ },
1878
+ {
1879
+ "epoch": 2.767134951042997,
1880
+ "grad_norm": 1.9737045764923096,
1881
+ "learning_rate": 2.1471428571428573e-06,
1882
+ "loss": 0.0884,
1883
+ "step": 6500
1884
+ },
1885
+ {
1886
+ "epoch": 2.7777777777777777,
1887
+ "grad_norm": 3.017702102661133,
1888
+ "learning_rate": 2.1114285714285714e-06,
1889
+ "loss": 0.0894,
1890
+ "step": 6525
1891
+ },
1892
+ {
1893
+ "epoch": 2.7884206045125586,
1894
+ "grad_norm": 2.41921067237854,
1895
+ "learning_rate": 2.075714285714286e-06,
1896
+ "loss": 0.0855,
1897
+ "step": 6550
1898
+ },
1899
+ {
1900
+ "epoch": 2.7990634312473395,
1901
+ "grad_norm": 2.0304954051971436,
1902
+ "learning_rate": 2.04e-06,
1903
+ "loss": 0.0802,
1904
+ "step": 6575
1905
+ },
1906
+ {
1907
+ "epoch": 2.80970625798212,
1908
+ "grad_norm": 2.724147319793701,
1909
+ "learning_rate": 2.0042857142857145e-06,
1910
+ "loss": 0.0892,
1911
+ "step": 6600
1912
+ },
1913
+ {
1914
+ "epoch": 2.820349084716901,
1915
+ "grad_norm": 1.7320371866226196,
1916
+ "learning_rate": 1.968571428571429e-06,
1917
+ "loss": 0.1036,
1918
+ "step": 6625
1919
+ },
1920
+ {
1921
+ "epoch": 2.8309919114516817,
1922
+ "grad_norm": 2.932657241821289,
1923
+ "learning_rate": 1.932857142857143e-06,
1924
+ "loss": 0.0902,
1925
+ "step": 6650
1926
+ },
1927
+ {
1928
+ "epoch": 2.841634738186462,
1929
+ "grad_norm": 2.653630256652832,
1930
+ "learning_rate": 1.8971428571428573e-06,
1931
+ "loss": 0.0807,
1932
+ "step": 6675
1933
+ },
1934
+ {
1935
+ "epoch": 2.852277564921243,
1936
+ "grad_norm": 2.851041078567505,
1937
+ "learning_rate": 1.8614285714285714e-06,
1938
+ "loss": 0.0908,
1939
+ "step": 6700
1940
+ },
1941
+ {
1942
+ "epoch": 2.862920391656024,
1943
+ "grad_norm": 3.30446720123291,
1944
+ "learning_rate": 1.825714285714286e-06,
1945
+ "loss": 0.1001,
1946
+ "step": 6725
1947
+ },
1948
+ {
1949
+ "epoch": 2.873563218390805,
1950
+ "grad_norm": 3.250701427459717,
1951
+ "learning_rate": 1.79e-06,
1952
+ "loss": 0.0825,
1953
+ "step": 6750
1954
+ },
1955
+ {
1956
+ "epoch": 2.8842060451255853,
1957
+ "grad_norm": 2.4845850467681885,
1958
+ "learning_rate": 1.7542857142857145e-06,
1959
+ "loss": 0.0705,
1960
+ "step": 6775
1961
+ },
1962
+ {
1963
+ "epoch": 2.894848871860366,
1964
+ "grad_norm": 2.6934683322906494,
1965
+ "learning_rate": 1.7185714285714286e-06,
1966
+ "loss": 0.0667,
1967
+ "step": 6800
1968
+ },
1969
+ {
1970
+ "epoch": 2.9054916985951467,
1971
+ "grad_norm": 2.785459518432617,
1972
+ "learning_rate": 1.6828571428571431e-06,
1973
+ "loss": 0.0746,
1974
+ "step": 6825
1975
+ },
1976
+ {
1977
+ "epoch": 2.9161345253299276,
1978
+ "grad_norm": 2.5107369422912598,
1979
+ "learning_rate": 1.6471428571428572e-06,
1980
+ "loss": 0.077,
1981
+ "step": 6850
1982
+ },
1983
+ {
1984
+ "epoch": 2.9267773520647085,
1985
+ "grad_norm": 3.4977328777313232,
1986
+ "learning_rate": 1.6114285714285715e-06,
1987
+ "loss": 0.0857,
1988
+ "step": 6875
1989
+ },
1990
+ {
1991
+ "epoch": 2.9374201787994894,
1992
+ "grad_norm": 2.6151537895202637,
1993
+ "learning_rate": 1.575714285714286e-06,
1994
+ "loss": 0.0729,
1995
+ "step": 6900
1996
+ },
1997
+ {
1998
+ "epoch": 2.94806300553427,
1999
+ "grad_norm": 2.975446939468384,
2000
+ "learning_rate": 1.54e-06,
2001
+ "loss": 0.0581,
2002
+ "step": 6925
2003
+ },
2004
+ {
2005
+ "epoch": 2.9587058322690507,
2006
+ "grad_norm": 2.03027606010437,
2007
+ "learning_rate": 1.5042857142857146e-06,
2008
+ "loss": 0.1017,
2009
+ "step": 6950
2010
+ },
2011
+ {
2012
+ "epoch": 2.969348659003831,
2013
+ "grad_norm": 3.547647476196289,
2014
+ "learning_rate": 1.4685714285714287e-06,
2015
+ "loss": 0.0603,
2016
+ "step": 6975
2017
+ },
2018
+ {
2019
+ "epoch": 2.979991485738612,
2020
+ "grad_norm": 1.8231449127197266,
2021
+ "learning_rate": 1.432857142857143e-06,
2022
+ "loss": 0.0738,
2023
+ "step": 7000
2024
+ },
2025
+ {
2026
+ "epoch": 2.979991485738612,
2027
+ "eval_loss": 0.11986401677131653,
2028
+ "eval_runtime": 2460.4728,
2029
+ "eval_samples_per_second": 1.941,
2030
+ "eval_steps_per_second": 0.061,
2031
+ "eval_wer": 15.561025938059986,
2032
+ "step": 7000
2033
+ },
2034
+ {
2035
+ "epoch": 2.990634312473393,
2036
+ "grad_norm": 2.170557737350464,
2037
+ "learning_rate": 1.3971428571428573e-06,
2038
+ "loss": 0.0847,
2039
+ "step": 7025
2040
+ },
2041
+ {
2042
+ "epoch": 3.001277139208174,
2043
+ "grad_norm": 1.225490689277649,
2044
+ "learning_rate": 1.3614285714285716e-06,
2045
+ "loss": 0.0737,
2046
+ "step": 7050
2047
+ },
2048
+ {
2049
+ "epoch": 3.0119199659429543,
2050
+ "grad_norm": 2.1241679191589355,
2051
+ "learning_rate": 1.3257142857142856e-06,
2052
+ "loss": 0.0595,
2053
+ "step": 7075
2054
+ },
2055
+ {
2056
+ "epoch": 3.022562792677735,
2057
+ "grad_norm": 2.3180058002471924,
2058
+ "learning_rate": 1.2900000000000001e-06,
2059
+ "loss": 0.0615,
2060
+ "step": 7100
2061
+ },
2062
+ {
2063
+ "epoch": 3.033205619412516,
2064
+ "grad_norm": 2.4434351921081543,
2065
+ "learning_rate": 1.2542857142857142e-06,
2066
+ "loss": 0.0536,
2067
+ "step": 7125
2068
+ },
2069
+ {
2070
+ "epoch": 3.0438484461472965,
2071
+ "grad_norm": 2.712207317352295,
2072
+ "learning_rate": 1.2185714285714287e-06,
2073
+ "loss": 0.0558,
2074
+ "step": 7150
2075
+ },
2076
+ {
2077
+ "epoch": 3.0544912728820774,
2078
+ "grad_norm": 2.7258520126342773,
2079
+ "learning_rate": 1.182857142857143e-06,
2080
+ "loss": 0.0727,
2081
+ "step": 7175
2082
+ },
2083
+ {
2084
+ "epoch": 3.0651340996168583,
2085
+ "grad_norm": 2.103072166442871,
2086
+ "learning_rate": 1.1471428571428573e-06,
2087
+ "loss": 0.0479,
2088
+ "step": 7200
2089
+ },
2090
+ {
2091
+ "epoch": 3.075776926351639,
2092
+ "grad_norm": 1.9003605842590332,
2093
+ "learning_rate": 1.1114285714285714e-06,
2094
+ "loss": 0.0554,
2095
+ "step": 7225
2096
+ },
2097
+ {
2098
+ "epoch": 3.0864197530864197,
2099
+ "grad_norm": 1.4967641830444336,
2100
+ "learning_rate": 1.0757142857142857e-06,
2101
+ "loss": 0.0409,
2102
+ "step": 7250
2103
+ },
2104
+ {
2105
+ "epoch": 3.0970625798212006,
2106
+ "grad_norm": 1.389493703842163,
2107
+ "learning_rate": 1.04e-06,
2108
+ "loss": 0.0474,
2109
+ "step": 7275
2110
+ },
2111
+ {
2112
+ "epoch": 3.107705406555981,
2113
+ "grad_norm": 1.4253233671188354,
2114
+ "learning_rate": 1.0042857142857143e-06,
2115
+ "loss": 0.0445,
2116
+ "step": 7300
2117
+ },
2118
+ {
2119
+ "epoch": 3.118348233290762,
2120
+ "grad_norm": 2.6737582683563232,
2121
+ "learning_rate": 9.685714285714288e-07,
2122
+ "loss": 0.0584,
2123
+ "step": 7325
2124
+ },
2125
+ {
2126
+ "epoch": 3.128991060025543,
2127
+ "grad_norm": 2.5511069297790527,
2128
+ "learning_rate": 9.32857142857143e-07,
2129
+ "loss": 0.0557,
2130
+ "step": 7350
2131
+ },
2132
+ {
2133
+ "epoch": 3.1396338867603237,
2134
+ "grad_norm": 2.139846086502075,
2135
+ "learning_rate": 8.971428571428573e-07,
2136
+ "loss": 0.0467,
2137
+ "step": 7375
2138
+ },
2139
+ {
2140
+ "epoch": 3.150276713495104,
2141
+ "grad_norm": 1.826206088066101,
2142
+ "learning_rate": 8.614285714285716e-07,
2143
+ "loss": 0.054,
2144
+ "step": 7400
2145
+ },
2146
+ {
2147
+ "epoch": 3.160919540229885,
2148
+ "grad_norm": 2.8576643466949463,
2149
+ "learning_rate": 8.257142857142858e-07,
2150
+ "loss": 0.0517,
2151
+ "step": 7425
2152
+ },
2153
+ {
2154
+ "epoch": 3.171562366964666,
2155
+ "grad_norm": 2.1208717823028564,
2156
+ "learning_rate": 7.900000000000001e-07,
2157
+ "loss": 0.0667,
2158
+ "step": 7450
2159
+ },
2160
+ {
2161
+ "epoch": 3.1822051936994464,
2162
+ "grad_norm": 1.534239649772644,
2163
+ "learning_rate": 7.542857142857144e-07,
2164
+ "loss": 0.0592,
2165
+ "step": 7475
2166
+ },
2167
+ {
2168
+ "epoch": 3.1928480204342273,
2169
+ "grad_norm": 2.3605740070343018,
2170
+ "learning_rate": 7.185714285714286e-07,
2171
+ "loss": 0.063,
2172
+ "step": 7500
2173
+ },
2174
+ {
2175
+ "epoch": 3.2034908471690082,
2176
+ "grad_norm": 2.1266493797302246,
2177
+ "learning_rate": 6.842857142857143e-07,
2178
+ "loss": 0.0567,
2179
+ "step": 7525
2180
+ },
2181
+ {
2182
+ "epoch": 3.2141336739037887,
2183
+ "grad_norm": 1.5303648710250854,
2184
+ "learning_rate": 6.485714285714287e-07,
2185
+ "loss": 0.0619,
2186
+ "step": 7550
2187
+ },
2188
+ {
2189
+ "epoch": 3.2247765006385696,
2190
+ "grad_norm": 2.740006446838379,
2191
+ "learning_rate": 6.128571428571429e-07,
2192
+ "loss": 0.0773,
2193
+ "step": 7575
2194
+ },
2195
+ {
2196
+ "epoch": 3.2354193273733505,
2197
+ "grad_norm": 1.5786134004592896,
2198
+ "learning_rate": 5.771428571428572e-07,
2199
+ "loss": 0.0629,
2200
+ "step": 7600
2201
+ },
2202
+ {
2203
+ "epoch": 3.246062154108131,
2204
+ "grad_norm": 1.3754280805587769,
2205
+ "learning_rate": 5.414285714285715e-07,
2206
+ "loss": 0.0614,
2207
+ "step": 7625
2208
+ },
2209
+ {
2210
+ "epoch": 3.256704980842912,
2211
+ "grad_norm": 0.8814867734909058,
2212
+ "learning_rate": 5.057142857142858e-07,
2213
+ "loss": 0.0574,
2214
+ "step": 7650
2215
+ },
2216
+ {
2217
+ "epoch": 3.2673478075776927,
2218
+ "grad_norm": 2.909646511077881,
2219
+ "learning_rate": 4.7000000000000005e-07,
2220
+ "loss": 0.0408,
2221
+ "step": 7675
2222
+ },
2223
+ {
2224
+ "epoch": 3.2779906343124736,
2225
+ "grad_norm": 2.272367238998413,
2226
+ "learning_rate": 4.342857142857143e-07,
2227
+ "loss": 0.0539,
2228
+ "step": 7700
2229
+ },
2230
+ {
2231
+ "epoch": 3.288633461047254,
2232
+ "grad_norm": 2.039271831512451,
2233
+ "learning_rate": 3.985714285714286e-07,
2234
+ "loss": 0.0688,
2235
+ "step": 7725
2236
+ },
2237
+ {
2238
+ "epoch": 3.299276287782035,
2239
+ "grad_norm": 2.0516164302825928,
2240
+ "learning_rate": 3.6285714285714283e-07,
2241
+ "loss": 0.0546,
2242
+ "step": 7750
2243
+ },
2244
+ {
2245
+ "epoch": 3.3099191145168154,
2246
+ "grad_norm": 1.9131453037261963,
2247
+ "learning_rate": 3.271428571428572e-07,
2248
+ "loss": 0.0532,
2249
+ "step": 7775
2250
+ },
2251
+ {
2252
+ "epoch": 3.3205619412515963,
2253
+ "grad_norm": 1.66374933719635,
2254
+ "learning_rate": 2.914285714285715e-07,
2255
+ "loss": 0.0536,
2256
+ "step": 7800
2257
+ },
2258
+ {
2259
+ "epoch": 3.331204767986377,
2260
+ "grad_norm": 1.596907615661621,
2261
+ "learning_rate": 2.557142857142857e-07,
2262
+ "loss": 0.0456,
2263
+ "step": 7825
2264
+ },
2265
+ {
2266
+ "epoch": 3.341847594721158,
2267
+ "grad_norm": 2.430992603302002,
2268
+ "learning_rate": 2.2e-07,
2269
+ "loss": 0.0635,
2270
+ "step": 7850
2271
+ },
2272
+ {
2273
+ "epoch": 3.3524904214559386,
2274
+ "grad_norm": 2.4150683879852295,
2275
+ "learning_rate": 1.842857142857143e-07,
2276
+ "loss": 0.0638,
2277
+ "step": 7875
2278
+ },
2279
+ {
2280
+ "epoch": 3.3631332481907195,
2281
+ "grad_norm": 1.5517698526382446,
2282
+ "learning_rate": 1.4857142857142857e-07,
2283
+ "loss": 0.0615,
2284
+ "step": 7900
2285
+ },
2286
+ {
2287
+ "epoch": 3.3737760749255004,
2288
+ "grad_norm": 1.8786826133728027,
2289
+ "learning_rate": 1.1285714285714287e-07,
2290
+ "loss": 0.0607,
2291
+ "step": 7925
2292
+ },
2293
+ {
2294
+ "epoch": 3.384418901660281,
2295
+ "grad_norm": 2.849170446395874,
2296
+ "learning_rate": 7.714285714285715e-08,
2297
+ "loss": 0.0567,
2298
+ "step": 7950
2299
+ },
2300
+ {
2301
+ "epoch": 3.3950617283950617,
2302
+ "grad_norm": 2.8803513050079346,
2303
+ "learning_rate": 4.1428571428571426e-08,
2304
+ "loss": 0.0507,
2305
+ "step": 7975
2306
+ },
2307
+ {
2308
+ "epoch": 3.4057045551298426,
2309
+ "grad_norm": 2.7549145221710205,
2310
+ "learning_rate": 5.714285714285715e-09,
2311
+ "loss": 0.0544,
2312
+ "step": 8000
2313
+ },
2314
+ {
2315
+ "epoch": 3.4057045551298426,
2316
+ "eval_loss": 0.1155887097120285,
2317
+ "eval_runtime": 2444.7471,
2318
+ "eval_samples_per_second": 1.954,
2319
+ "eval_steps_per_second": 0.061,
2320
+ "eval_wer": 15.246076710047603,
2321
+ "step": 8000
2322
+ }
2323
+ ],
2324
+ "logging_steps": 25,
2325
+ "max_steps": 8000,
2326
+ "num_input_tokens_seen": 0,
2327
+ "num_train_epochs": 4,
2328
+ "save_steps": 1000,
2329
+ "stateful_callbacks": {
2330
+ "TrainerControl": {
2331
+ "args": {
2332
+ "should_epoch_stop": false,
2333
+ "should_evaluate": false,
2334
+ "should_log": false,
2335
+ "should_save": true,
2336
+ "should_training_stop": true
2337
+ },
2338
+ "attributes": {}
2339
+ }
2340
+ },
2341
+ "total_flos": 4.364112316878029e+20,
2342
+ "train_batch_size": 32,
2343
+ "trial_name": null,
2344
+ "trial_params": null
2345
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:df4c3f6cdbdc7d71c42768190b989231c9ffbea37fb13bfc17d552f5f4c2732d
3
+ size 5368