MarkelFe commited on
Commit
69d7590
1 Parent(s): a742bd0

Training in progress, step 330000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2757a06bce48fa87bf9e9478c26cfbf215c50d2f68265f3a5f1369e2325144f9
3
  size 995605445
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3af69497cd7909d18e142fdb28af685d5fd4b6d1152a10069e6f9547b752a3d5
3
  size 995605445
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b33bd9e5a65135e04092c64ff67287c6b50f9ca7261884c1aeef47ab5ae22ab5
3
  size 14575
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:926d62aa8c0e8eb933d167c471771b77e84d87c1ca180d71a4153e846dcaa5bc
3
  size 14575
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:feb25646b1703a274e33c829bedb7366b8ef3f96c2bb36d8789415b3f642f877
3
  size 627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dac0a7c6aeb1853f8f712f2c9f2553cbe98523717c3c148ee76debf888894532
3
  size 627
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 7.576475044985321,
5
- "global_step": 320000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -4108,11 +4108,139 @@
4108
  "eval_samples_per_second": 166.315,
4109
  "eval_steps_per_second": 20.794,
4110
  "step": 320000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4111
  }
4112
  ],
4113
  "max_steps": 633540,
4114
  "num_train_epochs": 15,
4115
- "total_flos": 6.760424309184e+16,
4116
  "trial_name": null,
4117
  "trial_params": null
4118
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 7.813239890141112,
5
+ "global_step": 330000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
4108
  "eval_samples_per_second": 166.315,
4109
  "eval_steps_per_second": 20.794,
4110
  "step": 320000
4111
+ },
4112
+ {
4113
+ "epoch": 7.59,
4114
+ "learning_rate": 0.0,
4115
+ "loss": 2.4285,
4116
+ "step": 320500
4117
+ },
4118
+ {
4119
+ "epoch": 7.6,
4120
+ "learning_rate": 0.0,
4121
+ "loss": 2.4267,
4122
+ "step": 321000
4123
+ },
4124
+ {
4125
+ "epoch": 7.61,
4126
+ "learning_rate": 0.0,
4127
+ "loss": 2.4389,
4128
+ "step": 321500
4129
+ },
4130
+ {
4131
+ "epoch": 7.62,
4132
+ "learning_rate": 0.0,
4133
+ "loss": 2.4277,
4134
+ "step": 322000
4135
+ },
4136
+ {
4137
+ "epoch": 7.64,
4138
+ "learning_rate": 0.0,
4139
+ "loss": 2.4349,
4140
+ "step": 322500
4141
+ },
4142
+ {
4143
+ "epoch": 7.65,
4144
+ "learning_rate": 0.0,
4145
+ "loss": 2.4376,
4146
+ "step": 323000
4147
+ },
4148
+ {
4149
+ "epoch": 7.66,
4150
+ "learning_rate": 0.0,
4151
+ "loss": 2.4453,
4152
+ "step": 323500
4153
+ },
4154
+ {
4155
+ "epoch": 7.67,
4156
+ "learning_rate": 0.0,
4157
+ "loss": 2.4331,
4158
+ "step": 324000
4159
+ },
4160
+ {
4161
+ "epoch": 7.68,
4162
+ "learning_rate": 0.0,
4163
+ "loss": 2.4418,
4164
+ "step": 324500
4165
+ },
4166
+ {
4167
+ "epoch": 7.69,
4168
+ "learning_rate": 0.0,
4169
+ "loss": 2.4342,
4170
+ "step": 325000
4171
+ },
4172
+ {
4173
+ "epoch": 7.71,
4174
+ "learning_rate": 0.0,
4175
+ "loss": 2.4216,
4176
+ "step": 325500
4177
+ },
4178
+ {
4179
+ "epoch": 7.72,
4180
+ "learning_rate": 0.0,
4181
+ "loss": 2.4335,
4182
+ "step": 326000
4183
+ },
4184
+ {
4185
+ "epoch": 7.73,
4186
+ "learning_rate": 0.0,
4187
+ "loss": 2.4472,
4188
+ "step": 326500
4189
+ },
4190
+ {
4191
+ "epoch": 7.74,
4192
+ "learning_rate": 0.0,
4193
+ "loss": 2.4415,
4194
+ "step": 327000
4195
+ },
4196
+ {
4197
+ "epoch": 7.75,
4198
+ "learning_rate": 0.0,
4199
+ "loss": 2.4462,
4200
+ "step": 327500
4201
+ },
4202
+ {
4203
+ "epoch": 7.77,
4204
+ "learning_rate": 0.0,
4205
+ "loss": 2.4519,
4206
+ "step": 328000
4207
+ },
4208
+ {
4209
+ "epoch": 7.78,
4210
+ "learning_rate": 0.0,
4211
+ "loss": 2.3932,
4212
+ "step": 328500
4213
+ },
4214
+ {
4215
+ "epoch": 7.79,
4216
+ "learning_rate": 0.0,
4217
+ "loss": 2.425,
4218
+ "step": 329000
4219
+ },
4220
+ {
4221
+ "epoch": 7.8,
4222
+ "learning_rate": 0.0,
4223
+ "loss": 2.4297,
4224
+ "step": 329500
4225
+ },
4226
+ {
4227
+ "epoch": 7.81,
4228
+ "learning_rate": 0.0,
4229
+ "loss": 2.4363,
4230
+ "step": 330000
4231
+ },
4232
+ {
4233
+ "epoch": 7.81,
4234
+ "eval_loss": 3.1522228717803955,
4235
+ "eval_runtime": 112.8083,
4236
+ "eval_samples_per_second": 166.406,
4237
+ "eval_steps_per_second": 20.805,
4238
+ "step": 330000
4239
  }
4240
  ],
4241
  "max_steps": 633540,
4242
  "num_train_epochs": 15,
4243
+ "total_flos": 6.971360467392e+16,
4244
  "trial_name": null,
4245
  "trial_params": null
4246
  }