Training in progress, step 49, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 39256456
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:61c4aef52327b74e276194084cea23c87f6341f1f7b52a3cefd75fcaf48183e5
|
3 |
size 39256456
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 20359034
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:298622a49610be57349874e2523642fc0a004bb3569dff87441b2022830a345f
|
3 |
size 20359034
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:25704ee5478cd4521ee5db3e9e14050e9c72edebe1088acf05e49ba2bb8ed7fb
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 500,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -871,6 +871,24 @@
|
|
871 |
"rewards/margins": 0.01552429050207138,
|
872 |
"rewards/rejected": -0.22111549973487854,
|
873 |
"step": 48
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
874 |
}
|
875 |
],
|
876 |
"logging_steps": 1,
|
@@ -885,7 +903,7 @@
|
|
885 |
"should_evaluate": false,
|
886 |
"should_log": false,
|
887 |
"should_save": true,
|
888 |
-
"should_training_stop":
|
889 |
},
|
890 |
"attributes": {}
|
891 |
}
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 0.9990442816183498,
|
5 |
"eval_steps": 500,
|
6 |
+
"global_step": 49,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
871 |
"rewards/margins": 0.01552429050207138,
|
872 |
"rewards/rejected": -0.22111549973487854,
|
873 |
"step": 48
|
874 |
+
},
|
875 |
+
{
|
876 |
+
"epoch": 0.9990442816183498,
|
877 |
+
"grad_norm": 4.174467086791992,
|
878 |
+
"learning_rate": 0.0,
|
879 |
+
"log_odds_chosen": 0.19606728851795197,
|
880 |
+
"log_odds_ratio": -0.6126885414123535,
|
881 |
+
"logits/chosen": -21.158491134643555,
|
882 |
+
"logits/rejected": -21.405784606933594,
|
883 |
+
"logps/chosen": -2.048922538757324,
|
884 |
+
"logps/rejected": -2.2221457958221436,
|
885 |
+
"loss": 2.4451,
|
886 |
+
"nll_loss": 2.3838396072387695,
|
887 |
+
"rewards/accuracies": 0.71875,
|
888 |
+
"rewards/chosen": -0.20489224791526794,
|
889 |
+
"rewards/margins": 0.017322326079010963,
|
890 |
+
"rewards/rejected": -0.22221459448337555,
|
891 |
+
"step": 49
|
892 |
}
|
893 |
],
|
894 |
"logging_steps": 1,
|
|
|
903 |
"should_evaluate": false,
|
904 |
"should_log": false,
|
905 |
"should_save": true,
|
906 |
+
"should_training_stop": true
|
907 |
},
|
908 |
"attributes": {}
|
909 |
}
|