MarkelFe commited on
Commit
888fae1
1 Parent(s): bd2f5fe

Training in progress, step 180000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:57dee83a189d98be49bec5f74817fff8657797a917a6717ceea36d9c18ae6862
3
  size 995605445
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:58db17469cdf65f4aec47bb1e6dd34e7ef4e2c19f61ee30f01ab8aabbe5433b3
3
  size 995605445
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f42dbd9b35415ba34033be719191148beaa9e62794b2440aac238f33a11f2b47
3
  size 510398013
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:58842975ca799b06b4fa71b7c90228c8d715f5a818a53786f1086ad507f6d094
3
  size 510398013
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0560bfc1efbba55157929955ba0396617b61df4e817669f59453ff7efb93f22b
3
  size 14575
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:59392ffa3e40bbac5c000c9a093cf9e7ad91abc71b1747bf198e56aee121e33f
3
  size 14575
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:178dabfccbd230719cf675b65dfa8a438486e62518bee396fdb95743a820c1ad
3
  size 627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cd44998ac74ad04897dc0df2a86a524cb79649de88a84a40636d9419486a8937
3
  size 627
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 4.025002367648452,
5
- "global_step": 170000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -2188,11 +2188,139 @@
2188
  "eval_samples_per_second": 166.449,
2189
  "eval_steps_per_second": 20.811,
2190
  "step": 170000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2191
  }
2192
  ],
2193
  "max_steps": 633540,
2194
  "num_train_epochs": 15,
2195
- "total_flos": 3.6054596397312e+16,
2196
  "trial_name": null,
2197
  "trial_params": null
2198
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 4.2617672128042425,
5
+ "global_step": 180000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
2188
  "eval_samples_per_second": 166.449,
2189
  "eval_steps_per_second": 20.811,
2190
  "step": 170000
2191
+ },
2192
+ {
2193
+ "epoch": 4.04,
2194
+ "learning_rate": 1.760431859077564e-05,
2195
+ "loss": 2.6119,
2196
+ "step": 170500
2197
+ },
2198
+ {
2199
+ "epoch": 4.05,
2200
+ "learning_rate": 1.75411812987341e-05,
2201
+ "loss": 2.6106,
2202
+ "step": 171000
2203
+ },
2204
+ {
2205
+ "epoch": 4.06,
2206
+ "learning_rate": 1.7478044006692554e-05,
2207
+ "loss": 2.6353,
2208
+ "step": 171500
2209
+ },
2210
+ {
2211
+ "epoch": 4.07,
2212
+ "learning_rate": 1.741490671465101e-05,
2213
+ "loss": 2.6192,
2214
+ "step": 172000
2215
+ },
2216
+ {
2217
+ "epoch": 4.08,
2218
+ "learning_rate": 1.7351769422609465e-05,
2219
+ "loss": 2.6269,
2220
+ "step": 172500
2221
+ },
2222
+ {
2223
+ "epoch": 4.1,
2224
+ "learning_rate": 1.7288632130567923e-05,
2225
+ "loss": 2.6145,
2226
+ "step": 173000
2227
+ },
2228
+ {
2229
+ "epoch": 4.11,
2230
+ "learning_rate": 1.7225494838526378e-05,
2231
+ "loss": 2.6366,
2232
+ "step": 173500
2233
+ },
2234
+ {
2235
+ "epoch": 4.12,
2236
+ "learning_rate": 1.7162357546484833e-05,
2237
+ "loss": 2.6273,
2238
+ "step": 174000
2239
+ },
2240
+ {
2241
+ "epoch": 4.13,
2242
+ "learning_rate": 1.709922025444329e-05,
2243
+ "loss": 2.6381,
2244
+ "step": 174500
2245
+ },
2246
+ {
2247
+ "epoch": 4.14,
2248
+ "learning_rate": 1.7036082962401744e-05,
2249
+ "loss": 2.6218,
2250
+ "step": 175000
2251
+ },
2252
+ {
2253
+ "epoch": 4.16,
2254
+ "learning_rate": 1.69729456703602e-05,
2255
+ "loss": 2.6406,
2256
+ "step": 175500
2257
+ },
2258
+ {
2259
+ "epoch": 4.17,
2260
+ "learning_rate": 1.6909808378318654e-05,
2261
+ "loss": 2.6366,
2262
+ "step": 176000
2263
+ },
2264
+ {
2265
+ "epoch": 4.18,
2266
+ "learning_rate": 1.6846671086277112e-05,
2267
+ "loss": 2.6445,
2268
+ "step": 176500
2269
+ },
2270
+ {
2271
+ "epoch": 4.19,
2272
+ "learning_rate": 1.6783533794235567e-05,
2273
+ "loss": 2.6649,
2274
+ "step": 177000
2275
+ },
2276
+ {
2277
+ "epoch": 4.2,
2278
+ "learning_rate": 1.6720396502194022e-05,
2279
+ "loss": 2.6351,
2280
+ "step": 177500
2281
+ },
2282
+ {
2283
+ "epoch": 4.21,
2284
+ "learning_rate": 1.6657259210152478e-05,
2285
+ "loss": 2.6261,
2286
+ "step": 178000
2287
+ },
2288
+ {
2289
+ "epoch": 4.23,
2290
+ "learning_rate": 1.6594121918110933e-05,
2291
+ "loss": 2.6413,
2292
+ "step": 178500
2293
+ },
2294
+ {
2295
+ "epoch": 4.24,
2296
+ "learning_rate": 1.6530984626069388e-05,
2297
+ "loss": 2.6418,
2298
+ "step": 179000
2299
+ },
2300
+ {
2301
+ "epoch": 4.25,
2302
+ "learning_rate": 1.6467847334027843e-05,
2303
+ "loss": 2.6263,
2304
+ "step": 179500
2305
+ },
2306
+ {
2307
+ "epoch": 4.26,
2308
+ "learning_rate": 1.64047100419863e-05,
2309
+ "loss": 2.6519,
2310
+ "step": 180000
2311
+ },
2312
+ {
2313
+ "epoch": 4.26,
2314
+ "eval_loss": 3.13765549659729,
2315
+ "eval_runtime": 112.7621,
2316
+ "eval_samples_per_second": 166.474,
2317
+ "eval_steps_per_second": 20.814,
2318
+ "step": 180000
2319
  }
2320
  ],
2321
  "max_steps": 633540,
2322
  "num_train_epochs": 15,
2323
+ "total_flos": 3.8174350461696e+16,
2324
  "trial_name": null,
2325
  "trial_params": null
2326
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f42dbd9b35415ba34033be719191148beaa9e62794b2440aac238f33a11f2b47
3
  size 510398013
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:58842975ca799b06b4fa71b7c90228c8d715f5a818a53786f1086ad507f6d094
3
  size 510398013