Ezhilraj69999 commited on
Commit
5626e82
1 Parent(s): 6dd0894

End of training

Browse files
Files changed (5) hide show
  1. README.md +4 -3
  2. all_results.json +11 -11
  3. eval_results.json +6 -6
  4. train_results.json +6 -6
  5. trainer_state.json +305 -106
README.md CHANGED
@@ -3,6 +3,7 @@ library_name: transformers
3
  license: apache-2.0
4
  base_model: facebook/wav2vec2-base
5
  tags:
 
6
  - generated_from_trainer
7
  metrics:
8
  - accuracy
@@ -16,10 +17,10 @@ should probably proofread and complete it, then remove this comment. -->
16
 
17
  # wav2vec2-base-wakeword
18
 
19
- This model is a fine-tuned version of [facebook/wav2vec2-base](https://huggingface.co/facebook/wav2vec2-base) on the None dataset.
20
  It achieves the following results on the evaluation set:
21
- - Loss: 0.2259
22
- - Accuracy: 0.8689
23
 
24
  ## Model description
25
 
 
3
  license: apache-2.0
4
  base_model: facebook/wav2vec2-base
5
  tags:
6
+ - audio-classification
7
  - generated_from_trainer
8
  metrics:
9
  - accuracy
 
17
 
18
  # wav2vec2-base-wakeword
19
 
20
+ This model is a fine-tuned version of [facebook/wav2vec2-base](https://huggingface.co/facebook/wav2vec2-base) on the superb dataset.
21
  It achieves the following results on the evaluation set:
22
+ - Loss: 0.1988
23
+ - Accuracy: 0.8980
24
 
25
  ## Model description
26
 
all_results.json CHANGED
@@ -1,13 +1,13 @@
1
  {
2
- "epoch": 4.916201117318436,
3
- "eval_accuracy": 0.7472527472527473,
4
- "eval_loss": 4.021728038787842,
5
- "eval_runtime": 21.9494,
6
- "eval_samples_per_second": 58.042,
7
- "eval_steps_per_second": 1.822,
8
- "total_flos": 2.59294349617536e+17,
9
- "train_loss": 5.575903250954368,
10
- "train_runtime": 352.7331,
11
- "train_samples_per_second": 81.067,
12
- "train_steps_per_second": 0.624
13
  }
 
1
  {
2
+ "epoch": 9.832402234636872,
3
+ "eval_accuracy": 0.8979591836734694,
4
+ "eval_loss": 0.1988428384065628,
5
+ "eval_runtime": 21.864,
6
+ "eval_samples_per_second": 58.269,
7
+ "eval_steps_per_second": 1.829,
8
+ "total_flos": 5.1057322104096e+17,
9
+ "train_loss": 0.21603115824135868,
10
+ "train_runtime": 746.0649,
11
+ "train_samples_per_second": 76.656,
12
+ "train_steps_per_second": 0.59
13
  }
eval_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 4.916201117318436,
3
- "eval_accuracy": 0.7472527472527473,
4
- "eval_loss": 4.021728038787842,
5
- "eval_runtime": 21.9494,
6
- "eval_samples_per_second": 58.042,
7
- "eval_steps_per_second": 1.822
8
  }
 
1
  {
2
+ "epoch": 9.832402234636872,
3
+ "eval_accuracy": 0.8979591836734694,
4
+ "eval_loss": 0.1988428384065628,
5
+ "eval_runtime": 21.864,
6
+ "eval_samples_per_second": 58.269,
7
+ "eval_steps_per_second": 1.829
8
  }
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 4.916201117318436,
3
- "total_flos": 2.59294349617536e+17,
4
- "train_loss": 5.575903250954368,
5
- "train_runtime": 352.7331,
6
- "train_samples_per_second": 81.067,
7
- "train_steps_per_second": 0.624
8
  }
 
1
  {
2
+ "epoch": 9.832402234636872,
3
+ "total_flos": 5.1057322104096e+17,
4
+ "train_loss": 0.21603115824135868,
5
+ "train_runtime": 746.0649,
6
+ "train_samples_per_second": 76.656,
7
+ "train_steps_per_second": 0.59
8
  }
trainer_state.json CHANGED
@@ -1,226 +1,425 @@
1
  {
2
- "best_metric": 0.7472527472527473,
3
- "best_model_checkpoint": "wav2vec2-base-wakeword/checkpoint-179",
4
- "epoch": 4.916201117318436,
5
  "eval_steps": 500,
6
- "global_step": 220,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
  "epoch": 0.22346368715083798,
13
- "grad_norm": 1.7972055673599243,
14
- "learning_rate": 1.3636363636363637e-05,
15
- "loss": 8.6174,
16
  "step": 10
17
  },
18
  {
19
  "epoch": 0.44692737430167595,
20
- "grad_norm": 3.6899046897888184,
21
- "learning_rate": 2.7272727272727273e-05,
22
- "loss": 8.3156,
23
  "step": 20
24
  },
25
  {
26
  "epoch": 0.6703910614525139,
27
- "grad_norm": 4.899014949798584,
28
- "learning_rate": 2.8787878787878788e-05,
29
- "loss": 7.6827,
30
  "step": 30
31
  },
32
  {
33
  "epoch": 0.8938547486033519,
34
- "grad_norm": 5.642251014709473,
35
  "learning_rate": 2.7272727272727273e-05,
36
- "loss": 7.269,
37
  "step": 40
38
  },
39
  {
40
  "epoch": 0.9832402234636871,
41
- "eval_accuracy": 0.5274725274725275,
42
- "eval_loss": 6.90291690826416,
43
- "eval_runtime": 20.6201,
44
- "eval_samples_per_second": 61.784,
45
- "eval_steps_per_second": 1.94,
46
  "step": 44
47
  },
48
  {
49
  "epoch": 1.1173184357541899,
50
- "grad_norm": 6.348006248474121,
51
- "learning_rate": 2.575757575757576e-05,
52
- "loss": 6.9601,
53
  "step": 50
54
  },
55
  {
56
  "epoch": 1.3407821229050279,
57
- "grad_norm": 6.931854248046875,
58
- "learning_rate": 2.4242424242424244e-05,
59
- "loss": 6.6727,
60
  "step": 60
61
  },
62
  {
63
  "epoch": 1.564245810055866,
64
- "grad_norm": 7.51934814453125,
65
- "learning_rate": 2.272727272727273e-05,
66
- "loss": 6.3984,
67
  "step": 70
68
  },
69
  {
70
  "epoch": 1.7877094972067038,
71
- "grad_norm": 8.076024055480957,
72
- "learning_rate": 2.121212121212121e-05,
73
- "loss": 6.1201,
74
  "step": 80
75
  },
76
  {
77
  "epoch": 1.988826815642458,
78
- "eval_accuracy": 0.5572998430141287,
79
- "eval_loss": 5.679985046386719,
80
- "eval_runtime": 21.9537,
81
- "eval_samples_per_second": 58.031,
82
- "eval_steps_per_second": 1.822,
83
  "step": 89
84
  },
85
  {
86
  "epoch": 2.011173184357542,
87
- "grad_norm": 8.460003852844238,
88
- "learning_rate": 1.9696969696969697e-05,
89
- "loss": 5.8571,
90
  "step": 90
91
  },
92
  {
93
  "epoch": 2.2346368715083798,
94
- "grad_norm": 8.846624374389648,
95
- "learning_rate": 1.8181818181818182e-05,
96
- "loss": 5.5999,
97
  "step": 100
98
  },
99
  {
100
  "epoch": 2.458100558659218,
101
- "grad_norm": 9.555537223815918,
102
- "learning_rate": 1.6666666666666667e-05,
103
- "loss": 5.3511,
104
  "step": 110
105
  },
106
  {
107
  "epoch": 2.6815642458100557,
108
- "grad_norm": 9.668717384338379,
109
- "learning_rate": 1.5151515151515153e-05,
110
- "loss": 5.1268,
111
  "step": 120
112
  },
113
  {
114
  "epoch": 2.905027932960894,
115
- "grad_norm": 9.954819679260254,
116
- "learning_rate": 1.3636363636363637e-05,
117
- "loss": 4.9094,
118
  "step": 130
119
  },
120
  {
121
  "epoch": 2.994413407821229,
122
- "eval_accuracy": 0.6797488226059655,
123
- "eval_loss": 4.662096977233887,
124
- "eval_runtime": 22.1355,
125
- "eval_samples_per_second": 57.555,
126
- "eval_steps_per_second": 1.807,
127
  "step": 134
128
  },
129
  {
130
  "epoch": 3.1284916201117317,
131
- "grad_norm": 10.29443073272705,
132
- "learning_rate": 1.2121212121212122e-05,
133
- "loss": 4.7082,
134
  "step": 140
135
  },
136
  {
137
  "epoch": 3.35195530726257,
138
- "grad_norm": 10.487744331359863,
139
- "learning_rate": 1.0606060606060606e-05,
140
- "loss": 4.5347,
141
  "step": 150
142
  },
143
  {
144
  "epoch": 3.5754189944134076,
145
- "grad_norm": 10.81801700592041,
146
- "learning_rate": 9.090909090909091e-06,
147
- "loss": 4.3854,
148
  "step": 160
149
  },
150
  {
151
  "epoch": 3.798882681564246,
152
- "grad_norm": 10.992441177368164,
153
- "learning_rate": 7.5757575757575764e-06,
154
- "loss": 4.2402,
155
  "step": 170
156
  },
157
  {
158
  "epoch": 4.0,
159
- "eval_accuracy": 0.7472527472527473,
160
- "eval_loss": 4.021728038787842,
161
- "eval_runtime": 21.4694,
162
- "eval_samples_per_second": 59.34,
163
- "eval_steps_per_second": 1.863,
164
  "step": 179
165
  },
166
  {
167
  "epoch": 4.022346368715084,
168
- "grad_norm": 11.058788299560547,
169
- "learning_rate": 6.060606060606061e-06,
170
- "loss": 4.1213,
171
  "step": 180
172
  },
173
  {
174
  "epoch": 4.245810055865922,
175
- "grad_norm": 11.229594230651855,
176
- "learning_rate": 4.5454545454545455e-06,
177
- "loss": 4.0389,
178
  "step": 190
179
  },
180
  {
181
  "epoch": 4.4692737430167595,
182
- "grad_norm": 11.344269752502441,
183
- "learning_rate": 3.0303030303030305e-06,
184
- "loss": 3.9595,
185
  "step": 200
186
  },
187
  {
188
  "epoch": 4.692737430167598,
189
- "grad_norm": 11.231822967529297,
190
- "learning_rate": 1.5151515151515152e-06,
191
- "loss": 3.9156,
192
  "step": 210
193
  },
194
  {
195
  "epoch": 4.916201117318436,
196
- "grad_norm": 11.280599594116211,
197
- "learning_rate": 0.0,
198
- "loss": 3.8857,
199
  "step": 220
200
  },
201
  {
202
- "epoch": 4.916201117318436,
203
- "eval_accuracy": 0.717425431711146,
204
- "eval_loss": 3.8197391033172607,
205
- "eval_runtime": 21.9944,
206
- "eval_samples_per_second": 57.924,
207
- "eval_steps_per_second": 1.819,
208
- "step": 220
209
  },
210
  {
211
- "epoch": 4.916201117318436,
212
- "step": 220,
213
- "total_flos": 2.59294349617536e+17,
214
- "train_loss": 5.575903250954368,
215
- "train_runtime": 352.7331,
216
- "train_samples_per_second": 81.067,
217
- "train_steps_per_second": 0.624
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
218
  }
219
  ],
220
  "logging_steps": 10,
221
- "max_steps": 220,
222
  "num_input_tokens_seen": 0,
223
- "num_train_epochs": 5,
224
  "save_steps": 500,
225
  "stateful_callbacks": {
226
  "TrainerControl": {
@@ -234,7 +433,7 @@
234
  "attributes": {}
235
  }
236
  },
237
- "total_flos": 2.59294349617536e+17,
238
  "train_batch_size": 32,
239
  "trial_name": null,
240
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.8979591836734694,
3
+ "best_model_checkpoint": "wav2vec2-base-wakeword/checkpoint-358",
4
+ "epoch": 9.832402234636872,
5
  "eval_steps": 500,
6
+ "global_step": 440,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
  "epoch": 0.22346368715083798,
13
+ "grad_norm": 0.2407168596982956,
14
+ "learning_rate": 6.818181818181818e-06,
15
+ "loss": 0.691,
16
  "step": 10
17
  },
18
  {
19
  "epoch": 0.44692737430167595,
20
+ "grad_norm": 0.2775614261627197,
21
+ "learning_rate": 1.3636363636363637e-05,
22
+ "loss": 0.6834,
23
  "step": 20
24
  },
25
  {
26
  "epoch": 0.6703910614525139,
27
+ "grad_norm": 0.8038562536239624,
28
+ "learning_rate": 2.0454545454545454e-05,
29
+ "loss": 0.6587,
30
  "step": 30
31
  },
32
  {
33
  "epoch": 0.8938547486033519,
34
+ "grad_norm": 1.2933474779129028,
35
  "learning_rate": 2.7272727272727273e-05,
36
+ "loss": 0.5835,
37
  "step": 40
38
  },
39
  {
40
  "epoch": 0.9832402234636871,
41
+ "eval_accuracy": 0.8304552590266876,
42
+ "eval_loss": 0.4824756383895874,
43
+ "eval_runtime": 21.1053,
44
+ "eval_samples_per_second": 60.364,
45
+ "eval_steps_per_second": 1.895,
46
  "step": 44
47
  },
48
  {
49
  "epoch": 1.1173184357541899,
50
+ "grad_norm": 2.5961785316467285,
51
+ "learning_rate": 2.9545454545454545e-05,
52
+ "loss": 0.4205,
53
  "step": 50
54
  },
55
  {
56
  "epoch": 1.3407821229050279,
57
+ "grad_norm": 3.4171087741851807,
58
+ "learning_rate": 2.8787878787878788e-05,
59
+ "loss": 0.3854,
60
  "step": 60
61
  },
62
  {
63
  "epoch": 1.564245810055866,
64
+ "grad_norm": 1.979997158050537,
65
+ "learning_rate": 2.803030303030303e-05,
66
+ "loss": 0.3348,
67
  "step": 70
68
  },
69
  {
70
  "epoch": 1.7877094972067038,
71
+ "grad_norm": 5.98250150680542,
72
+ "learning_rate": 2.7272727272727273e-05,
73
+ "loss": 0.3198,
74
  "step": 80
75
  },
76
  {
77
  "epoch": 1.988826815642458,
78
+ "eval_accuracy": 0.8681318681318682,
79
+ "eval_loss": 0.32195624709129333,
80
+ "eval_runtime": 21.9041,
81
+ "eval_samples_per_second": 58.163,
82
+ "eval_steps_per_second": 1.826,
83
  "step": 89
84
  },
85
  {
86
  "epoch": 2.011173184357542,
87
+ "grad_norm": 3.1482908725738525,
88
+ "learning_rate": 2.6515151515151516e-05,
89
+ "loss": 0.2848,
90
  "step": 90
91
  },
92
  {
93
  "epoch": 2.2346368715083798,
94
+ "grad_norm": 2.441495895385742,
95
+ "learning_rate": 2.575757575757576e-05,
96
+ "loss": 0.2551,
97
  "step": 100
98
  },
99
  {
100
  "epoch": 2.458100558659218,
101
+ "grad_norm": 3.192270517349243,
102
+ "learning_rate": 2.5e-05,
103
+ "loss": 0.2147,
104
  "step": 110
105
  },
106
  {
107
  "epoch": 2.6815642458100557,
108
+ "grad_norm": 2.66180419921875,
109
+ "learning_rate": 2.4242424242424244e-05,
110
+ "loss": 0.1969,
111
  "step": 120
112
  },
113
  {
114
  "epoch": 2.905027932960894,
115
+ "grad_norm": 3.3350164890289307,
116
+ "learning_rate": 2.3484848484848487e-05,
117
+ "loss": 0.2074,
118
  "step": 130
119
  },
120
  {
121
  "epoch": 2.994413407821229,
122
+ "eval_accuracy": 0.8571428571428571,
123
+ "eval_loss": 0.2878796458244324,
124
+ "eval_runtime": 22.2774,
125
+ "eval_samples_per_second": 57.188,
126
+ "eval_steps_per_second": 1.796,
127
  "step": 134
128
  },
129
  {
130
  "epoch": 3.1284916201117317,
131
+ "grad_norm": 2.3112423419952393,
132
+ "learning_rate": 2.272727272727273e-05,
133
+ "loss": 0.1678,
134
  "step": 140
135
  },
136
  {
137
  "epoch": 3.35195530726257,
138
+ "grad_norm": 2.198840618133545,
139
+ "learning_rate": 2.1969696969696972e-05,
140
+ "loss": 0.1946,
141
  "step": 150
142
  },
143
  {
144
  "epoch": 3.5754189944134076,
145
+ "grad_norm": 2.59767746925354,
146
+ "learning_rate": 2.121212121212121e-05,
147
+ "loss": 0.1839,
148
  "step": 160
149
  },
150
  {
151
  "epoch": 3.798882681564246,
152
+ "grad_norm": 4.717933654785156,
153
+ "learning_rate": 2.0454545454545454e-05,
154
+ "loss": 0.164,
155
  "step": 170
156
  },
157
  {
158
  "epoch": 4.0,
159
+ "eval_accuracy": 0.8453689167974883,
160
+ "eval_loss": 0.28666409850120544,
161
+ "eval_runtime": 22.7696,
162
+ "eval_samples_per_second": 55.952,
163
+ "eval_steps_per_second": 1.757,
164
  "step": 179
165
  },
166
  {
167
  "epoch": 4.022346368715084,
168
+ "grad_norm": 1.9042879343032837,
169
+ "learning_rate": 1.9696969696969697e-05,
170
+ "loss": 0.173,
171
  "step": 180
172
  },
173
  {
174
  "epoch": 4.245810055865922,
175
+ "grad_norm": 1.9547667503356934,
176
+ "learning_rate": 1.893939393939394e-05,
177
+ "loss": 0.1579,
178
  "step": 190
179
  },
180
  {
181
  "epoch": 4.4692737430167595,
182
+ "grad_norm": 4.667260646820068,
183
+ "learning_rate": 1.8181818181818182e-05,
184
+ "loss": 0.1569,
185
  "step": 200
186
  },
187
  {
188
  "epoch": 4.692737430167598,
189
+ "grad_norm": 2.756227970123291,
190
+ "learning_rate": 1.7424242424242425e-05,
191
+ "loss": 0.1499,
192
  "step": 210
193
  },
194
  {
195
  "epoch": 4.916201117318436,
196
+ "grad_norm": 1.5382293462753296,
197
+ "learning_rate": 1.6666666666666667e-05,
198
+ "loss": 0.1524,
199
  "step": 220
200
  },
201
  {
202
+ "epoch": 4.983240223463687,
203
+ "eval_accuracy": 0.8414442700156985,
204
+ "eval_loss": 0.2756529152393341,
205
+ "eval_runtime": 22.7791,
206
+ "eval_samples_per_second": 55.929,
207
+ "eval_steps_per_second": 1.756,
208
+ "step": 223
209
  },
210
  {
211
+ "epoch": 5.139664804469274,
212
+ "grad_norm": 1.9439266920089722,
213
+ "learning_rate": 1.590909090909091e-05,
214
+ "loss": 0.1429,
215
+ "step": 230
216
+ },
217
+ {
218
+ "epoch": 5.363128491620111,
219
+ "grad_norm": 3.896639585494995,
220
+ "learning_rate": 1.5151515151515153e-05,
221
+ "loss": 0.1378,
222
+ "step": 240
223
+ },
224
+ {
225
+ "epoch": 5.58659217877095,
226
+ "grad_norm": 2.2387351989746094,
227
+ "learning_rate": 1.4393939393939394e-05,
228
+ "loss": 0.1204,
229
+ "step": 250
230
+ },
231
+ {
232
+ "epoch": 5.810055865921788,
233
+ "grad_norm": 2.18900990486145,
234
+ "learning_rate": 1.3636363636363637e-05,
235
+ "loss": 0.1529,
236
+ "step": 260
237
+ },
238
+ {
239
+ "epoch": 5.988826815642458,
240
+ "eval_accuracy": 0.8273155416012559,
241
+ "eval_loss": 0.3233005106449127,
242
+ "eval_runtime": 22.9092,
243
+ "eval_samples_per_second": 55.611,
244
+ "eval_steps_per_second": 1.746,
245
+ "step": 268
246
+ },
247
+ {
248
+ "epoch": 6.033519553072626,
249
+ "grad_norm": 2.8840532302856445,
250
+ "learning_rate": 1.287878787878788e-05,
251
+ "loss": 0.1345,
252
+ "step": 270
253
+ },
254
+ {
255
+ "epoch": 6.256983240223463,
256
+ "grad_norm": 4.434890270233154,
257
+ "learning_rate": 1.2121212121212122e-05,
258
+ "loss": 0.1382,
259
+ "step": 280
260
+ },
261
+ {
262
+ "epoch": 6.4804469273743015,
263
+ "grad_norm": 0.9645891785621643,
264
+ "learning_rate": 1.1363636363636365e-05,
265
+ "loss": 0.1071,
266
+ "step": 290
267
+ },
268
+ {
269
+ "epoch": 6.70391061452514,
270
+ "grad_norm": 1.0303077697753906,
271
+ "learning_rate": 1.0606060606060606e-05,
272
+ "loss": 0.1383,
273
+ "step": 300
274
+ },
275
+ {
276
+ "epoch": 6.927374301675978,
277
+ "grad_norm": 1.8454982042312622,
278
+ "learning_rate": 9.848484848484848e-06,
279
+ "loss": 0.1256,
280
+ "step": 310
281
+ },
282
+ {
283
+ "epoch": 6.994413407821229,
284
+ "eval_accuracy": 0.8665620094191523,
285
+ "eval_loss": 0.21918237209320068,
286
+ "eval_runtime": 23.3426,
287
+ "eval_samples_per_second": 54.578,
288
+ "eval_steps_per_second": 1.714,
289
+ "step": 313
290
+ },
291
+ {
292
+ "epoch": 7.150837988826815,
293
+ "grad_norm": 2.300724506378174,
294
+ "learning_rate": 9.090909090909091e-06,
295
+ "loss": 0.1268,
296
+ "step": 320
297
+ },
298
+ {
299
+ "epoch": 7.374301675977653,
300
+ "grad_norm": 3.0894436836242676,
301
+ "learning_rate": 8.333333333333334e-06,
302
+ "loss": 0.1136,
303
+ "step": 330
304
+ },
305
+ {
306
+ "epoch": 7.597765363128492,
307
+ "grad_norm": 4.030120372772217,
308
+ "learning_rate": 7.5757575757575764e-06,
309
+ "loss": 0.1372,
310
+ "step": 340
311
+ },
312
+ {
313
+ "epoch": 7.82122905027933,
314
+ "grad_norm": 2.9448421001434326,
315
+ "learning_rate": 6.818181818181818e-06,
316
+ "loss": 0.1169,
317
+ "step": 350
318
+ },
319
+ {
320
+ "epoch": 8.0,
321
+ "eval_accuracy": 0.8979591836734694,
322
+ "eval_loss": 0.1988428384065628,
323
+ "eval_runtime": 22.9044,
324
+ "eval_samples_per_second": 55.623,
325
+ "eval_steps_per_second": 1.746,
326
+ "step": 358
327
+ },
328
+ {
329
+ "epoch": 8.044692737430168,
330
+ "grad_norm": 0.7518815994262695,
331
+ "learning_rate": 6.060606060606061e-06,
332
+ "loss": 0.1275,
333
+ "step": 360
334
+ },
335
+ {
336
+ "epoch": 8.268156424581006,
337
+ "grad_norm": 3.2900145053863525,
338
+ "learning_rate": 5.303030303030303e-06,
339
+ "loss": 0.1221,
340
+ "step": 370
341
+ },
342
+ {
343
+ "epoch": 8.491620111731844,
344
+ "grad_norm": 5.220028400421143,
345
+ "learning_rate": 4.5454545454545455e-06,
346
+ "loss": 0.1281,
347
+ "step": 380
348
+ },
349
+ {
350
+ "epoch": 8.71508379888268,
351
+ "grad_norm": 1.6203066110610962,
352
+ "learning_rate": 3.7878787878787882e-06,
353
+ "loss": 0.1163,
354
+ "step": 390
355
+ },
356
+ {
357
+ "epoch": 8.938547486033519,
358
+ "grad_norm": 2.0471959114074707,
359
+ "learning_rate": 3.0303030303030305e-06,
360
+ "loss": 0.1128,
361
+ "step": 400
362
+ },
363
+ {
364
+ "epoch": 8.983240223463687,
365
+ "eval_accuracy": 0.8712715855572999,
366
+ "eval_loss": 0.21884499490261078,
367
+ "eval_runtime": 22.9459,
368
+ "eval_samples_per_second": 55.522,
369
+ "eval_steps_per_second": 1.743,
370
+ "step": 402
371
+ },
372
+ {
373
+ "epoch": 9.162011173184357,
374
+ "grad_norm": 1.1530722379684448,
375
+ "learning_rate": 2.2727272727272728e-06,
376
+ "loss": 0.1086,
377
+ "step": 410
378
+ },
379
+ {
380
+ "epoch": 9.385474860335195,
381
+ "grad_norm": 2.0811774730682373,
382
+ "learning_rate": 1.5151515151515152e-06,
383
+ "loss": 0.1296,
384
+ "step": 420
385
+ },
386
+ {
387
+ "epoch": 9.608938547486034,
388
+ "grad_norm": 0.9542053937911987,
389
+ "learning_rate": 7.575757575757576e-07,
390
+ "loss": 0.1067,
391
+ "step": 430
392
+ },
393
+ {
394
+ "epoch": 9.832402234636872,
395
+ "grad_norm": 2.6447253227233887,
396
+ "learning_rate": 0.0,
397
+ "loss": 0.1252,
398
+ "step": 440
399
+ },
400
+ {
401
+ "epoch": 9.832402234636872,
402
+ "eval_accuracy": 0.868916797488226,
403
+ "eval_loss": 0.2259027510881424,
404
+ "eval_runtime": 23.8978,
405
+ "eval_samples_per_second": 53.31,
406
+ "eval_steps_per_second": 1.674,
407
+ "step": 440
408
+ },
409
+ {
410
+ "epoch": 9.832402234636872,
411
+ "step": 440,
412
+ "total_flos": 5.1057322104096e+17,
413
+ "train_loss": 0.21603115824135868,
414
+ "train_runtime": 746.0649,
415
+ "train_samples_per_second": 76.656,
416
+ "train_steps_per_second": 0.59
417
  }
418
  ],
419
  "logging_steps": 10,
420
+ "max_steps": 440,
421
  "num_input_tokens_seen": 0,
422
+ "num_train_epochs": 10,
423
  "save_steps": 500,
424
  "stateful_callbacks": {
425
  "TrainerControl": {
 
433
  "attributes": {}
434
  }
435
  },
436
+ "total_flos": 5.1057322104096e+17,
437
  "train_batch_size": 32,
438
  "trial_name": null,
439
  "trial_params": null