k-r-l commited on
Commit
b93fb83
1 Parent(s): 19def9e

Training in progress, step 24, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:043a24487f95798eb31e2e434c8f075b49656a359854f8ff738f5f3c15acc083
3
  size 83945296
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cb81ac1b964ed31e4b1ff1c64a37506b2a96efef0fc70b09b389a030d16c3f4b
3
  size 83945296
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9e447628aef17c528b9e6d3f3fb57f7d1f50cabfad42fff8d7b14821007d5cc8
3
  size 42545748
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e7cb67b858d82ecd307231b42f064860cff65961f05b8e65aa405c5eb6d6c6b8
3
  size 42545748
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:465463eb403cb020ef1ee281a98625c53f7ae06bb22cde52270f71fd23ceb4dc
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d63912703bb01fa9210978a44b98730cfdaa0a2f521b98b15df7dae272abb8ab
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.40777317617075504,
5
  "eval_steps": 500,
6
- "global_step": 20,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -367,6 +367,78 @@
367
  "rewards/margins": 0.018004287034273148,
368
  "rewards/rejected": -0.10551030933856964,
369
  "step": 20
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
370
  }
371
  ],
372
  "logging_steps": 1,
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.489327811404906,
5
  "eval_steps": 500,
6
+ "global_step": 24,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
367
  "rewards/margins": 0.018004287034273148,
368
  "rewards/rejected": -0.10551030933856964,
369
  "step": 20
370
+ },
371
+ {
372
+ "epoch": 0.4281618349792928,
373
+ "grad_norm": 1.4550942182540894,
374
+ "learning_rate": 7.17948717948718e-05,
375
+ "log_odds_chosen": 0.2803484797477722,
376
+ "log_odds_ratio": -0.5891110897064209,
377
+ "logits/chosen": -2.177445888519287,
378
+ "logits/rejected": -2.1862730979919434,
379
+ "logps/chosen": -0.8740922808647156,
380
+ "logps/rejected": -1.0376415252685547,
381
+ "loss": 1.184,
382
+ "nll_loss": 1.125113606452942,
383
+ "rewards/accuracies": 0.6875,
384
+ "rewards/chosen": -0.08740923553705215,
385
+ "rewards/margins": 0.01635492593050003,
386
+ "rewards/rejected": -0.10376415401697159,
387
+ "step": 21
388
+ },
389
+ {
390
+ "epoch": 0.4485504937878305,
391
+ "grad_norm": 1.5131646394729614,
392
+ "learning_rate": 6.923076923076924e-05,
393
+ "log_odds_chosen": 0.3196752965450287,
394
+ "log_odds_ratio": -0.5673432350158691,
395
+ "logits/chosen": -2.139277458190918,
396
+ "logits/rejected": -2.1643970012664795,
397
+ "logps/chosen": -0.8622347116470337,
398
+ "logps/rejected": -1.060903549194336,
399
+ "loss": 1.1375,
400
+ "nll_loss": 1.0807565450668335,
401
+ "rewards/accuracies": 0.78125,
402
+ "rewards/chosen": -0.08622346818447113,
403
+ "rewards/margins": 0.019866881892085075,
404
+ "rewards/rejected": -0.10609035938978195,
405
+ "step": 22
406
+ },
407
+ {
408
+ "epoch": 0.46893915259636826,
409
+ "grad_norm": 1.7129428386688232,
410
+ "learning_rate": 6.666666666666667e-05,
411
+ "log_odds_chosen": 0.3558296263217926,
412
+ "log_odds_ratio": -0.551045298576355,
413
+ "logits/chosen": -2.1384575366973877,
414
+ "logits/rejected": -2.1461870670318604,
415
+ "logps/chosen": -0.8587465286254883,
416
+ "logps/rejected": -1.0661779642105103,
417
+ "loss": 1.1327,
418
+ "nll_loss": 1.0775768756866455,
419
+ "rewards/accuracies": 0.8125,
420
+ "rewards/chosen": -0.08587465435266495,
421
+ "rewards/margins": 0.02074313722550869,
422
+ "rewards/rejected": -0.10661779344081879,
423
+ "step": 23
424
+ },
425
+ {
426
+ "epoch": 0.489327811404906,
427
+ "grad_norm": 1.7440029382705688,
428
+ "learning_rate": 6.410256410256412e-05,
429
+ "log_odds_chosen": 0.32858026027679443,
430
+ "log_odds_ratio": -0.5619024038314819,
431
+ "logits/chosen": -2.1546478271484375,
432
+ "logits/rejected": -2.1749908924102783,
433
+ "logps/chosen": -0.835049033164978,
434
+ "logps/rejected": -1.0198912620544434,
435
+ "loss": 1.1057,
436
+ "nll_loss": 1.0495383739471436,
437
+ "rewards/accuracies": 0.78125,
438
+ "rewards/chosen": -0.08350490033626556,
439
+ "rewards/margins": 0.01848422922194004,
440
+ "rewards/rejected": -0.10198913514614105,
441
+ "step": 24
442
  }
443
  ],
444
  "logging_steps": 1,