iamnguyen commited on
Commit
141672d
1 Parent(s): 57c1c32

Training in progress, step 64, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:422330c45920b3454b22adb45eb05e35af13896d1bbbcc99089a1dcc51734025
3
  size 1722054072
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c1dce664c65bb51e096d639575dafc29c2f3fd1714a025d6689ea2fe6339fcb3
3
  size 1722054072
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:df1854a93e1b3a7db7c26392bc5b8fd50f521da6797c93a8dc8e598cb72798cf
3
  size 863084376
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9bfa1b6e7f0f7d799285b821f079fc13c29e5892353a0d99aa0b231161962f0b
3
  size 863084376
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:289a7023689774a52b1b13bddd72584da17e89c0b2ac881739dc070a2b1f637f
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3535a115dd66ed576001ff6370d23c91e17d7f2564bf842d10adac987c704d2b
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.0010344186647917829,
5
  "eval_steps": 500,
6
- "global_step": 16,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -119,6 +119,342 @@
119
  "learning_rate": 1.0322580645161291e-06,
120
  "loss": 1.618,
121
  "step": 16
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
122
  }
123
  ],
124
  "logging_steps": 1,
@@ -138,7 +474,7 @@
138
  "attributes": {}
139
  }
140
  },
141
- "total_flos": 1.605109794349056e+16,
142
  "train_batch_size": 4,
143
  "trial_name": null,
144
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.0041376746591671315,
5
  "eval_steps": 500,
6
+ "global_step": 64,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
119
  "learning_rate": 1.0322580645161291e-06,
120
  "loss": 1.618,
121
  "step": 16
122
+ },
123
+ {
124
+ "epoch": 0.0010990698313412693,
125
+ "grad_norm": 27.484895706176758,
126
+ "learning_rate": 1.0967741935483872e-06,
127
+ "loss": 1.5973,
128
+ "step": 17
129
+ },
130
+ {
131
+ "epoch": 0.0011637209978907557,
132
+ "grad_norm": 20.07984161376953,
133
+ "learning_rate": 1.1612903225806454e-06,
134
+ "loss": 1.6907,
135
+ "step": 18
136
+ },
137
+ {
138
+ "epoch": 0.0012283721644402422,
139
+ "grad_norm": 14.788183212280273,
140
+ "learning_rate": 1.2258064516129033e-06,
141
+ "loss": 1.6986,
142
+ "step": 19
143
+ },
144
+ {
145
+ "epoch": 0.0012930233309897286,
146
+ "grad_norm": 9.38936996459961,
147
+ "learning_rate": 1.2903225806451614e-06,
148
+ "loss": 1.6164,
149
+ "step": 20
150
+ },
151
+ {
152
+ "epoch": 0.001357674497539215,
153
+ "grad_norm": 8.975388526916504,
154
+ "learning_rate": 1.3548387096774195e-06,
155
+ "loss": 1.5637,
156
+ "step": 21
157
+ },
158
+ {
159
+ "epoch": 0.0014223256640887015,
160
+ "grad_norm": 14.67812442779541,
161
+ "learning_rate": 1.4193548387096776e-06,
162
+ "loss": 1.6869,
163
+ "step": 22
164
+ },
165
+ {
166
+ "epoch": 0.0014869768306381879,
167
+ "grad_norm": 8.56955337524414,
168
+ "learning_rate": 1.4838709677419356e-06,
169
+ "loss": 1.5948,
170
+ "step": 23
171
+ },
172
+ {
173
+ "epoch": 0.0015516279971876743,
174
+ "grad_norm": 5.918207168579102,
175
+ "learning_rate": 1.5483870967741937e-06,
176
+ "loss": 1.61,
177
+ "step": 24
178
+ },
179
+ {
180
+ "epoch": 0.0016162791637371607,
181
+ "grad_norm": 8.165249824523926,
182
+ "learning_rate": 1.6129032258064516e-06,
183
+ "loss": 1.712,
184
+ "step": 25
185
+ },
186
+ {
187
+ "epoch": 0.0016809303302866472,
188
+ "grad_norm": 9.301441192626953,
189
+ "learning_rate": 1.67741935483871e-06,
190
+ "loss": 1.6957,
191
+ "step": 26
192
+ },
193
+ {
194
+ "epoch": 0.0017455814968361336,
195
+ "grad_norm": 7.351085662841797,
196
+ "learning_rate": 1.7419354838709678e-06,
197
+ "loss": 1.5153,
198
+ "step": 27
199
+ },
200
+ {
201
+ "epoch": 0.00181023266338562,
202
+ "grad_norm": 7.347102642059326,
203
+ "learning_rate": 1.8064516129032258e-06,
204
+ "loss": 1.6625,
205
+ "step": 28
206
+ },
207
+ {
208
+ "epoch": 0.0018748838299351065,
209
+ "grad_norm": 7.22580623626709,
210
+ "learning_rate": 1.870967741935484e-06,
211
+ "loss": 1.5632,
212
+ "step": 29
213
+ },
214
+ {
215
+ "epoch": 0.001939534996484593,
216
+ "grad_norm": 5.857529163360596,
217
+ "learning_rate": 1.935483870967742e-06,
218
+ "loss": 1.5339,
219
+ "step": 30
220
+ },
221
+ {
222
+ "epoch": 0.002004186163034079,
223
+ "grad_norm": 7.3882222175598145,
224
+ "learning_rate": 2.0000000000000003e-06,
225
+ "loss": 1.5775,
226
+ "step": 31
227
+ },
228
+ {
229
+ "epoch": 0.0020688373295835658,
230
+ "grad_norm": 6.075866222381592,
231
+ "learning_rate": 2.0645161290322582e-06,
232
+ "loss": 1.55,
233
+ "step": 32
234
+ },
235
+ {
236
+ "epoch": 0.002133488496133052,
237
+ "grad_norm": 4.930643081665039,
238
+ "learning_rate": 2.129032258064516e-06,
239
+ "loss": 1.5525,
240
+ "step": 33
241
+ },
242
+ {
243
+ "epoch": 0.0021981396626825386,
244
+ "grad_norm": 6.426210880279541,
245
+ "learning_rate": 2.1935483870967745e-06,
246
+ "loss": 1.5845,
247
+ "step": 34
248
+ },
249
+ {
250
+ "epoch": 0.002262790829232025,
251
+ "grad_norm": 7.546030521392822,
252
+ "learning_rate": 2.2580645161290324e-06,
253
+ "loss": 1.6313,
254
+ "step": 35
255
+ },
256
+ {
257
+ "epoch": 0.0023274419957815115,
258
+ "grad_norm": 4.881446361541748,
259
+ "learning_rate": 2.3225806451612907e-06,
260
+ "loss": 1.5387,
261
+ "step": 36
262
+ },
263
+ {
264
+ "epoch": 0.0023920931623309977,
265
+ "grad_norm": 5.156312942504883,
266
+ "learning_rate": 2.3870967741935486e-06,
267
+ "loss": 1.4912,
268
+ "step": 37
269
+ },
270
+ {
271
+ "epoch": 0.0024567443288804843,
272
+ "grad_norm": 6.470444202423096,
273
+ "learning_rate": 2.4516129032258066e-06,
274
+ "loss": 1.4706,
275
+ "step": 38
276
+ },
277
+ {
278
+ "epoch": 0.0025213954954299705,
279
+ "grad_norm": 15.289785385131836,
280
+ "learning_rate": 2.5161290322580645e-06,
281
+ "loss": 1.4612,
282
+ "step": 39
283
+ },
284
+ {
285
+ "epoch": 0.002586046661979457,
286
+ "grad_norm": 6.442019939422607,
287
+ "learning_rate": 2.580645161290323e-06,
288
+ "loss": 1.5178,
289
+ "step": 40
290
+ },
291
+ {
292
+ "epoch": 0.0026506978285289434,
293
+ "grad_norm": 5.88471794128418,
294
+ "learning_rate": 2.645161290322581e-06,
295
+ "loss": 1.5095,
296
+ "step": 41
297
+ },
298
+ {
299
+ "epoch": 0.00271534899507843,
300
+ "grad_norm": 7.326111316680908,
301
+ "learning_rate": 2.709677419354839e-06,
302
+ "loss": 1.5718,
303
+ "step": 42
304
+ },
305
+ {
306
+ "epoch": 0.0027800001616279163,
307
+ "grad_norm": 4.946439266204834,
308
+ "learning_rate": 2.774193548387097e-06,
309
+ "loss": 1.5057,
310
+ "step": 43
311
+ },
312
+ {
313
+ "epoch": 0.002844651328177403,
314
+ "grad_norm": 5.956087589263916,
315
+ "learning_rate": 2.8387096774193553e-06,
316
+ "loss": 1.4635,
317
+ "step": 44
318
+ },
319
+ {
320
+ "epoch": 0.002909302494726889,
321
+ "grad_norm": 7.258240222930908,
322
+ "learning_rate": 2.903225806451613e-06,
323
+ "loss": 1.5494,
324
+ "step": 45
325
+ },
326
+ {
327
+ "epoch": 0.0029739536612763758,
328
+ "grad_norm": 5.20070743560791,
329
+ "learning_rate": 2.967741935483871e-06,
330
+ "loss": 1.5329,
331
+ "step": 46
332
+ },
333
+ {
334
+ "epoch": 0.003038604827825862,
335
+ "grad_norm": 6.215978145599365,
336
+ "learning_rate": 3.0322580645161295e-06,
337
+ "loss": 1.4822,
338
+ "step": 47
339
+ },
340
+ {
341
+ "epoch": 0.0031032559943753486,
342
+ "grad_norm": 4.830237865447998,
343
+ "learning_rate": 3.0967741935483874e-06,
344
+ "loss": 1.457,
345
+ "step": 48
346
+ },
347
+ {
348
+ "epoch": 0.003167907160924835,
349
+ "grad_norm": 6.3563971519470215,
350
+ "learning_rate": 3.1612903225806453e-06,
351
+ "loss": 1.4619,
352
+ "step": 49
353
+ },
354
+ {
355
+ "epoch": 0.0032325583274743215,
356
+ "grad_norm": 5.210926532745361,
357
+ "learning_rate": 3.225806451612903e-06,
358
+ "loss": 1.446,
359
+ "step": 50
360
+ },
361
+ {
362
+ "epoch": 0.0032972094940238077,
363
+ "grad_norm": 5.412484169006348,
364
+ "learning_rate": 3.2903225806451615e-06,
365
+ "loss": 1.4154,
366
+ "step": 51
367
+ },
368
+ {
369
+ "epoch": 0.0033618606605732944,
370
+ "grad_norm": 5.500335693359375,
371
+ "learning_rate": 3.35483870967742e-06,
372
+ "loss": 1.4902,
373
+ "step": 52
374
+ },
375
+ {
376
+ "epoch": 0.0034265118271227806,
377
+ "grad_norm": 5.576430320739746,
378
+ "learning_rate": 3.4193548387096773e-06,
379
+ "loss": 1.4931,
380
+ "step": 53
381
+ },
382
+ {
383
+ "epoch": 0.003491162993672267,
384
+ "grad_norm": 4.996302604675293,
385
+ "learning_rate": 3.4838709677419357e-06,
386
+ "loss": 1.374,
387
+ "step": 54
388
+ },
389
+ {
390
+ "epoch": 0.0035558141602217534,
391
+ "grad_norm": 4.846431255340576,
392
+ "learning_rate": 3.548387096774194e-06,
393
+ "loss": 1.5605,
394
+ "step": 55
395
+ },
396
+ {
397
+ "epoch": 0.00362046532677124,
398
+ "grad_norm": 5.039584636688232,
399
+ "learning_rate": 3.6129032258064515e-06,
400
+ "loss": 1.4703,
401
+ "step": 56
402
+ },
403
+ {
404
+ "epoch": 0.0036851164933207263,
405
+ "grad_norm": 4.541802406311035,
406
+ "learning_rate": 3.67741935483871e-06,
407
+ "loss": 1.4888,
408
+ "step": 57
409
+ },
410
+ {
411
+ "epoch": 0.003749767659870213,
412
+ "grad_norm": 4.2027997970581055,
413
+ "learning_rate": 3.741935483870968e-06,
414
+ "loss": 1.5091,
415
+ "step": 58
416
+ },
417
+ {
418
+ "epoch": 0.003814418826419699,
419
+ "grad_norm": 5.916923522949219,
420
+ "learning_rate": 3.8064516129032257e-06,
421
+ "loss": 1.4113,
422
+ "step": 59
423
+ },
424
+ {
425
+ "epoch": 0.003879069992969186,
426
+ "grad_norm": 4.324679374694824,
427
+ "learning_rate": 3.870967741935484e-06,
428
+ "loss": 1.5304,
429
+ "step": 60
430
+ },
431
+ {
432
+ "epoch": 0.0039437211595186724,
433
+ "grad_norm": 4.319639205932617,
434
+ "learning_rate": 3.935483870967742e-06,
435
+ "loss": 1.4203,
436
+ "step": 61
437
+ },
438
+ {
439
+ "epoch": 0.004008372326068158,
440
+ "grad_norm": 5.848201274871826,
441
+ "learning_rate": 4.000000000000001e-06,
442
+ "loss": 1.4394,
443
+ "step": 62
444
+ },
445
+ {
446
+ "epoch": 0.004073023492617645,
447
+ "grad_norm": 5.865200042724609,
448
+ "learning_rate": 4.064516129032259e-06,
449
+ "loss": 1.4537,
450
+ "step": 63
451
+ },
452
+ {
453
+ "epoch": 0.0041376746591671315,
454
+ "grad_norm": 4.825560569763184,
455
+ "learning_rate": 4.1290322580645165e-06,
456
+ "loss": 1.4732,
457
+ "step": 64
458
  }
459
  ],
460
  "logging_steps": 1,
 
474
  "attributes": {}
475
  }
476
  },
477
+ "total_flos": 6.80601377625047e+16,
478
  "train_batch_size": 4,
479
  "trial_name": null,
480
  "trial_params": null