k-r-l commited on
Commit
431c704
1 Parent(s): f50cdf5

Training in progress, step 8, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:afe86c1030d85f487dbaa747f3184d5a685096b20446b3a5c415a5404c51be57
3
  size 13648432
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8398c0e5bddec9eb883e0399f65b755d78d063f3b9ecbe58f175f44f364cc7e9
3
  size 13648432
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:73437178095b577cbb6ccd45b95b4e2591fd14a3e216d879675222d8b32be866
3
  size 7309882
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:35967646370f39bc8adcdeb6bb4be3fa34ef34aa59b57e6a7303e7a39ef1094c
3
  size 7309882
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5371e347fa271c949de68f5ea0cd89d44be17efe7025e824d262e6bcd68df751
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:600149e975d1bacd170387afa159597e660c1a6e94a6b29b69de17c6162d48a4
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.07133757961783439,
5
  "eval_steps": 500,
6
- "global_step": 7,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -133,6 +133,24 @@
133
  "rewards/margins": 0.003080902621150017,
134
  "rewards/rejected": -0.2951011657714844,
135
  "step": 7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
136
  }
137
  ],
138
  "logging_steps": 1,
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.08152866242038216,
5
  "eval_steps": 500,
6
+ "global_step": 8,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
133
  "rewards/margins": 0.003080902621150017,
134
  "rewards/rejected": -0.2951011657714844,
135
  "step": 7
136
+ },
137
+ {
138
+ "epoch": 0.08152866242038216,
139
+ "grad_norm": 1.8741809129714966,
140
+ "learning_rate": 9.727891156462585e-05,
141
+ "log_odds_chosen": 0.09863043576478958,
142
+ "log_odds_ratio": -0.6496734619140625,
143
+ "logits/chosen": -0.5901607275009155,
144
+ "logits/rejected": -0.5661267042160034,
145
+ "logps/chosen": -2.7296760082244873,
146
+ "logps/rejected": -2.8212223052978516,
147
+ "loss": 3.1928,
148
+ "nll_loss": 3.1278655529022217,
149
+ "rewards/accuracies": 0.65625,
150
+ "rewards/chosen": -0.2729676365852356,
151
+ "rewards/margins": 0.009154656901955605,
152
+ "rewards/rejected": -0.28212225437164307,
153
+ "step": 8
154
  }
155
  ],
156
  "logging_steps": 1,