Training in progress, step 18, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 13648432
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0c1c2f9eb3807f15e5202c65322a8f3cfb134e7beab87fc2ec60ec7ce25d0220
|
3 |
size 13648432
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 7309882
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d199b5dd209fd16b98226e0bfe983b172dc1010aece87d4e191d13daf3612235
|
3 |
size 7309882
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:523fdd0b639a62e85034051de84a610796befa0fd6e94d6a020f84859b31247b
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 500,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -313,6 +313,24 @@
|
|
313 |
"rewards/margins": 0.011046944186091423,
|
314 |
"rewards/rejected": -0.24714939296245575,
|
315 |
"step": 17
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
316 |
}
|
317 |
],
|
318 |
"logging_steps": 1,
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 0.18343949044585986,
|
5 |
"eval_steps": 500,
|
6 |
+
"global_step": 18,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
313 |
"rewards/margins": 0.011046944186091423,
|
314 |
"rewards/rejected": -0.24714939296245575,
|
315 |
"step": 17
|
316 |
+
},
|
317 |
+
{
|
318 |
+
"epoch": 0.18343949044585986,
|
319 |
+
"grad_norm": 2.0416226387023926,
|
320 |
+
"learning_rate": 9.387755102040817e-05,
|
321 |
+
"log_odds_chosen": 0.1555405557155609,
|
322 |
+
"log_odds_ratio": -0.6227366328239441,
|
323 |
+
"logits/chosen": -0.586774468421936,
|
324 |
+
"logits/rejected": -0.6310256123542786,
|
325 |
+
"logps/chosen": -2.2520110607147217,
|
326 |
+
"logps/rejected": -2.390634775161743,
|
327 |
+
"loss": 2.4749,
|
328 |
+
"nll_loss": 2.412618637084961,
|
329 |
+
"rewards/accuracies": 0.84375,
|
330 |
+
"rewards/chosen": -0.22520111501216888,
|
331 |
+
"rewards/margins": 0.013862377032637596,
|
332 |
+
"rewards/rejected": -0.23906347155570984,
|
333 |
+
"step": 18
|
334 |
}
|
335 |
],
|
336 |
"logging_steps": 1,
|