MarkelFe commited on
Commit
f6b6805
1 Parent(s): fe67d3f

Training in progress, step 410000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e4fff2aeefd83972c56998a5a03a364dee23e67947e66df33d2bd966ca11368c
3
  size 995605445
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c55164556dd25d6a582fef22bc2c651808a168730ec26b5c6008062fd0f7cc5d
3
  size 995605445
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c80caa7b1c7797261d4006e75832b7028b5806951118ab498cf6b90ae0034d29
3
  size 14575
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d13f3b70adc2f936ec547de1ec36f77495ca229446b2ed5ee40a8227e58a819b
3
  size 14575
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:92db83fcf7b59e32e0dc1343bd429b913ab1514aa1de584e30dfd9208d927736
3
  size 627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bc51395fb5526d062f2906017676468fbf5f119fecfd463d76e3f9fc8a940a31
3
  size 627
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 9.470593806231651,
5
- "global_step": 400000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -5132,11 +5132,139 @@
5132
  "eval_samples_per_second": 166.431,
5133
  "eval_steps_per_second": 20.808,
5134
  "step": 400000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5135
  }
5136
  ],
5137
  "max_steps": 633540,
5138
  "num_train_epochs": 15,
5139
- "total_flos": 8.444449158912e+16,
5140
  "trial_name": null,
5141
  "trial_params": null
5142
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 9.707358651387443,
5
+ "global_step": 410000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
5132
  "eval_samples_per_second": 166.431,
5133
  "eval_steps_per_second": 20.808,
5134
  "step": 400000
5135
+ },
5136
+ {
5137
+ "epoch": 9.48,
5138
+ "learning_rate": 0.0,
5139
+ "loss": 2.413,
5140
+ "step": 400500
5141
+ },
5142
+ {
5143
+ "epoch": 9.49,
5144
+ "learning_rate": 0.0,
5145
+ "loss": 2.4149,
5146
+ "step": 401000
5147
+ },
5148
+ {
5149
+ "epoch": 9.51,
5150
+ "learning_rate": 0.0,
5151
+ "loss": 2.4278,
5152
+ "step": 401500
5153
+ },
5154
+ {
5155
+ "epoch": 9.52,
5156
+ "learning_rate": 0.0,
5157
+ "loss": 2.4337,
5158
+ "step": 402000
5159
+ },
5160
+ {
5161
+ "epoch": 9.53,
5162
+ "learning_rate": 0.0,
5163
+ "loss": 2.4553,
5164
+ "step": 402500
5165
+ },
5166
+ {
5167
+ "epoch": 9.54,
5168
+ "learning_rate": 0.0,
5169
+ "loss": 2.4216,
5170
+ "step": 403000
5171
+ },
5172
+ {
5173
+ "epoch": 9.55,
5174
+ "learning_rate": 0.0,
5175
+ "loss": 2.4092,
5176
+ "step": 403500
5177
+ },
5178
+ {
5179
+ "epoch": 9.57,
5180
+ "learning_rate": 0.0,
5181
+ "loss": 2.4274,
5182
+ "step": 404000
5183
+ },
5184
+ {
5185
+ "epoch": 9.58,
5186
+ "learning_rate": 0.0,
5187
+ "loss": 2.426,
5188
+ "step": 404500
5189
+ },
5190
+ {
5191
+ "epoch": 9.59,
5192
+ "learning_rate": 0.0,
5193
+ "loss": 2.4348,
5194
+ "step": 405000
5195
+ },
5196
+ {
5197
+ "epoch": 9.6,
5198
+ "learning_rate": 0.0,
5199
+ "loss": 2.4308,
5200
+ "step": 405500
5201
+ },
5202
+ {
5203
+ "epoch": 9.61,
5204
+ "learning_rate": 0.0,
5205
+ "loss": 2.4369,
5206
+ "step": 406000
5207
+ },
5208
+ {
5209
+ "epoch": 9.62,
5210
+ "learning_rate": 0.0,
5211
+ "loss": 2.4375,
5212
+ "step": 406500
5213
+ },
5214
+ {
5215
+ "epoch": 9.64,
5216
+ "learning_rate": 0.0,
5217
+ "loss": 2.4243,
5218
+ "step": 407000
5219
+ },
5220
+ {
5221
+ "epoch": 9.65,
5222
+ "learning_rate": 0.0,
5223
+ "loss": 2.4209,
5224
+ "step": 407500
5225
+ },
5226
+ {
5227
+ "epoch": 9.66,
5228
+ "learning_rate": 0.0,
5229
+ "loss": 2.4399,
5230
+ "step": 408000
5231
+ },
5232
+ {
5233
+ "epoch": 9.67,
5234
+ "learning_rate": 0.0,
5235
+ "loss": 2.4302,
5236
+ "step": 408500
5237
+ },
5238
+ {
5239
+ "epoch": 9.68,
5240
+ "learning_rate": 0.0,
5241
+ "loss": 2.4611,
5242
+ "step": 409000
5243
+ },
5244
+ {
5245
+ "epoch": 9.7,
5246
+ "learning_rate": 0.0,
5247
+ "loss": 2.4166,
5248
+ "step": 409500
5249
+ },
5250
+ {
5251
+ "epoch": 9.71,
5252
+ "learning_rate": 0.0,
5253
+ "loss": 2.4281,
5254
+ "step": 410000
5255
+ },
5256
+ {
5257
+ "epoch": 9.71,
5258
+ "eval_loss": 3.1522228717803955,
5259
+ "eval_runtime": 112.8711,
5260
+ "eval_samples_per_second": 166.314,
5261
+ "eval_steps_per_second": 20.794,
5262
+ "step": 410000
5263
  }
5264
  ],
5265
  "max_steps": 633540,
5266
  "num_train_epochs": 15,
5267
+ "total_flos": 8.6548090457088e+16,
5268
  "trial_name": null,
5269
  "trial_params": null
5270
  }