boumehdi commited on
Commit
cdbe678
1 Parent(s): b452d82

Upload 12 files

Browse files
Files changed (7) hide show
  1. optimizer.pt +1 -1
  2. pytorch_model.bin +1 -1
  3. rng_state.pth +2 -2
  4. scaler.pt +1 -1
  5. scheduler.pt +1 -1
  6. trainer_state.json +46 -166
  7. training_args.bin +1 -1
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fd5ea1944603fdc43786885c6fad297352a4cfa88903c6e5e401a61afdb1ff69
3
  size 2490594117
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:da38f3bc2fe6927f77f1e949dacaaa5c88d1ff1e7b5b630565b9bad55cdc8037
3
  size 2490594117
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8413826d343ca1a5a05286342dc463d0c96f14f6f5250d6663157cd0a22bfa14
3
  size 1262168365
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e0e93138c7536f15d081a2517332243e990720ea7e4c526912b3a5cff073cf49
3
  size 1262168365
rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b4b28524c43613725eb2734e93e3c395b0e0263834fec5ee89fe1a89e4e55726
3
- size 14575
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:061193452a858228d1c66af4f1074191f8ac9e88dfc0f2e16c6ccd6079a33e18
3
+ size 14639
scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dd0d7640fa100af1c436a6097e415e0c78c222c34fc3a4163201c7f7420d7659
3
  size 557
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7076cb3d5134abd5db45f65ae43a71e92139309531dd802207a13ea550dea80b
3
  size 557
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dc7a186be3ea4a6d0305e7ea5e53d52b11c001b50c8a64d1a6c67ede89211232
3
  size 627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9c024168d11f393cebc0d70778a05dbba26ed981c876446f516b34a4b4b7f2cc
3
  size 627
trainer_state.json CHANGED
@@ -1,223 +1,103 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 17.752260397830018,
5
- "global_step": 2450,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
- "epoch": 0.72,
12
- "learning_rate": 1e-05,
13
- "loss": 0.0194,
14
- "step": 100
15
- },
16
- {
17
- "epoch": 1.45,
18
- "learning_rate": 9.99927530980506e-06,
19
- "loss": 0.0174,
20
- "step": 200
21
  },
22
  {
23
  "epoch": 2.17,
24
  "learning_rate": 9.998550619610118e-06,
25
- "loss": 0.0197,
26
  "step": 300
27
  },
28
  {
29
- "epoch": 2.54,
30
- "eval_loss": 0.2245764136314392,
31
- "eval_runtime": 322.1745,
32
- "eval_samples_per_second": 12.251,
33
- "eval_steps_per_second": 1.533,
34
- "eval_wer": 0.17368103101087395,
35
- "step": 350
36
- },
37
- {
38
- "epoch": 2.9,
39
- "learning_rate": 9.997825929415176e-06,
40
- "loss": 0.0205,
41
- "step": 400
42
  },
43
  {
44
  "epoch": 3.62,
45
- "learning_rate": 9.997108486122183e-06,
46
- "loss": 0.0199,
 
 
 
47
  "step": 500
48
  },
49
  {
50
  "epoch": 4.35,
51
- "learning_rate": 9.996383795927241e-06,
52
- "loss": 0.0198,
53
  "step": 600
54
  },
55
  {
56
- "epoch": 5.07,
57
- "learning_rate": 9.9956591057323e-06,
58
- "loss": 0.0193,
59
- "step": 700
60
- },
61
- {
62
- "epoch": 5.07,
63
- "eval_loss": 0.23122623562812805,
64
- "eval_runtime": 209.3421,
65
- "eval_samples_per_second": 18.854,
66
- "eval_steps_per_second": 2.36,
67
- "eval_wer": 0.17217076117599678,
68
- "step": 700
69
- },
70
- {
71
- "epoch": 5.8,
72
- "learning_rate": 9.994934415537358e-06,
73
- "loss": 0.0186,
74
- "step": 800
75
  },
76
  {
77
  "epoch": 6.52,
78
- "learning_rate": 9.994209725342417e-06,
79
- "loss": 0.0194,
80
  "step": 900
81
  },
82
  {
83
  "epoch": 7.25,
84
- "learning_rate": 9.993485035147475e-06,
85
- "loss": 0.0186,
 
 
 
86
  "step": 1000
87
  },
88
  {
89
  "epoch": 7.61,
90
- "eval_loss": 0.2398330122232437,
91
- "eval_runtime": 209.0,
92
- "eval_samples_per_second": 18.885,
93
- "eval_steps_per_second": 2.364,
94
- "eval_wer": 0.17232178815948448,
95
  "step": 1050
96
  },
97
- {
98
- "epoch": 7.97,
99
- "learning_rate": 9.992760344952534e-06,
100
- "loss": 0.0186,
101
- "step": 1100
102
- },
103
  {
104
  "epoch": 8.69,
105
- "learning_rate": 9.99203565475759e-06,
106
- "loss": 0.0191,
107
  "step": 1200
108
  },
109
  {
110
- "epoch": 9.42,
111
- "learning_rate": 9.991310964562651e-06,
112
- "loss": 0.0185,
113
- "step": 1300
114
- },
115
- {
116
- "epoch": 10.14,
117
- "learning_rate": 9.990586274367708e-06,
118
- "loss": 0.0171,
119
- "step": 1400
120
- },
121
- {
122
- "epoch": 10.14,
123
- "eval_loss": 0.24630184471607208,
124
- "eval_runtime": 212.2032,
125
- "eval_samples_per_second": 18.6,
126
- "eval_steps_per_second": 2.328,
127
- "eval_wer": 0.17020741039065646,
128
- "step": 1400
129
  },
130
  {
131
  "epoch": 10.87,
132
  "learning_rate": 9.989861584172766e-06,
133
- "loss": 0.0176,
134
  "step": 1500
135
  },
136
  {
137
- "epoch": 11.59,
138
- "learning_rate": 9.989136893977825e-06,
139
- "loss": 0.018,
140
- "step": 1600
141
- },
142
- {
143
- "epoch": 12.32,
144
- "learning_rate": 9.988412203782883e-06,
145
- "loss": 0.0172,
146
- "step": 1700
147
- },
148
- {
149
- "epoch": 12.68,
150
- "eval_loss": 0.24790118634700775,
151
- "eval_runtime": 215.9062,
152
- "eval_samples_per_second": 18.281,
153
- "eval_steps_per_second": 2.288,
154
- "eval_wer": 0.17081151832460734,
155
- "step": 1750
156
- },
157
- {
158
- "epoch": 13.04,
159
- "learning_rate": 9.987687513587942e-06,
160
- "loss": 0.0172,
161
- "step": 1800
162
- },
163
- {
164
- "epoch": 13.77,
165
- "learning_rate": 9.986962823393e-06,
166
- "loss": 0.0165,
167
- "step": 1900
168
- },
169
- {
170
- "epoch": 14.49,
171
- "learning_rate": 9.986238133198059e-06,
172
- "loss": 0.0176,
173
- "step": 2000
174
- },
175
- {
176
- "epoch": 15.22,
177
- "learning_rate": 9.985513443003117e-06,
178
- "loss": 0.0173,
179
- "step": 2100
180
- },
181
- {
182
- "epoch": 15.22,
183
- "eval_loss": 0.24780623614788055,
184
- "eval_runtime": 215.4375,
185
- "eval_samples_per_second": 18.321,
186
- "eval_steps_per_second": 2.293,
187
- "eval_wer": 0.17015706806282724,
188
- "step": 2100
189
- },
190
- {
191
- "epoch": 15.94,
192
- "learning_rate": 9.984788752808176e-06,
193
- "loss": 0.0168,
194
- "step": 2200
195
- },
196
- {
197
- "epoch": 16.67,
198
- "learning_rate": 9.984064062613234e-06,
199
- "loss": 0.0165,
200
- "step": 2300
201
- },
202
- {
203
- "epoch": 17.39,
204
- "learning_rate": 9.983339372418293e-06,
205
- "loss": 0.0169,
206
- "step": 2400
207
- },
208
- {
209
- "epoch": 17.75,
210
- "eval_loss": 0.24955050647258759,
211
- "eval_runtime": 224.6408,
212
- "eval_samples_per_second": 17.57,
213
- "eval_steps_per_second": 2.199,
214
- "eval_wer": 0.1689488521949255,
215
- "step": 2450
216
  }
217
  ],
218
  "max_steps": 1380000,
219
  "num_train_epochs": 10000,
220
- "total_flos": 5.57531310441053e+19,
221
  "trial_name": null,
222
  "trial_params": null
223
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 10.867992766726944,
5
+ "global_step": 1500,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
+ "epoch": 1.09,
12
+ "learning_rate": 9.99963765490253e-06,
13
+ "loss": 0.0157,
14
+ "step": 150
 
 
 
 
 
 
15
  },
16
  {
17
  "epoch": 2.17,
18
  "learning_rate": 9.998550619610118e-06,
19
+ "loss": 0.0123,
20
  "step": 300
21
  },
22
  {
23
+ "epoch": 3.26,
24
+ "learning_rate": 9.997463584317706e-06,
25
+ "loss": 0.0152,
26
+ "step": 450
 
 
 
 
 
 
 
 
 
27
  },
28
  {
29
  "epoch": 3.62,
30
+ "eval_loss": 0.23832739889621735,
31
+ "eval_runtime": 297.425,
32
+ "eval_samples_per_second": 13.271,
33
+ "eval_steps_per_second": 1.661,
34
+ "eval_wer": 0.17043338199023506,
35
  "step": 500
36
  },
37
  {
38
  "epoch": 4.35,
39
+ "learning_rate": 9.996376549025293e-06,
40
+ "loss": 0.0167,
41
  "step": 600
42
  },
43
  {
44
+ "epoch": 5.43,
45
+ "learning_rate": 9.99528951373288e-06,
46
+ "loss": 0.0148,
47
+ "step": 750
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
48
  },
49
  {
50
  "epoch": 6.52,
51
+ "learning_rate": 9.994202478440467e-06,
52
+ "loss": 0.0153,
53
  "step": 900
54
  },
55
  {
56
  "epoch": 7.25,
57
+ "eval_loss": 0.24725446105003357,
58
+ "eval_runtime": 201.8594,
59
+ "eval_samples_per_second": 19.553,
60
+ "eval_steps_per_second": 2.447,
61
+ "eval_wer": 0.16867166658277546,
62
  "step": 1000
63
  },
64
  {
65
  "epoch": 7.61,
66
+ "learning_rate": 9.993115443148055e-06,
67
+ "loss": 0.0163,
 
 
 
68
  "step": 1050
69
  },
 
 
 
 
 
 
70
  {
71
  "epoch": 8.69,
72
+ "learning_rate": 9.992028407855643e-06,
73
+ "loss": 0.0158,
74
  "step": 1200
75
  },
76
  {
77
+ "epoch": 9.78,
78
+ "learning_rate": 9.99094861946518e-06,
79
+ "loss": 0.0146,
80
+ "step": 1350
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
81
  },
82
  {
83
  "epoch": 10.87,
84
  "learning_rate": 9.989861584172766e-06,
85
+ "loss": 0.0154,
86
  "step": 1500
87
  },
88
  {
89
+ "epoch": 10.87,
90
+ "eval_loss": 0.24820923805236816,
91
+ "eval_runtime": 211.4153,
92
+ "eval_samples_per_second": 18.669,
93
+ "eval_steps_per_second": 2.337,
94
+ "eval_wer": 0.1679669804197916,
95
+ "step": 1500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
96
  }
97
  ],
98
  "max_steps": 1380000,
99
  "num_train_epochs": 10000,
100
+ "total_flos": 3.4217202294237536e+19,
101
  "trial_name": null,
102
  "trial_params": null
103
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3393125c7f14a291727a873967ef481f803e54d70aa9f3fcdf615773d38c2b19
3
  size 3323
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b8a48147752c3a8083ebf5cdd854e3c3535a7d951355ce96dd9e8d52de7d94ab
3
  size 3323