|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.971563981042654, |
|
"eval_steps": 128, |
|
"global_step": 104, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.018957345971563982, |
|
"grad_norm": 66.10237426067714, |
|
"learning_rate": 4.545454545454545e-08, |
|
"logits/chosen": 117.53560638427734, |
|
"logits/rejected": 126.8960952758789, |
|
"logps/chosen": -335.40118408203125, |
|
"logps/rejected": -439.16552734375, |
|
"loss": 0.5, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.1895734597156398, |
|
"grad_norm": 65.8363279797723, |
|
"learning_rate": 4.545454545454545e-07, |
|
"logits/chosen": 135.0050811767578, |
|
"logits/rejected": 138.34999084472656, |
|
"logps/chosen": -396.04180908203125, |
|
"logps/rejected": -440.06195068359375, |
|
"loss": 0.4978, |
|
"rewards/accuracies": 0.4583333432674408, |
|
"rewards/chosen": -0.0022641660179942846, |
|
"rewards/margins": 0.03446006774902344, |
|
"rewards/rejected": -0.03672423213720322, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.3791469194312796, |
|
"grad_norm": 30.020665129557617, |
|
"learning_rate": 4.885348141000122e-07, |
|
"logits/chosen": 123.14253234863281, |
|
"logits/rejected": 126.7535629272461, |
|
"logps/chosen": -354.2257995605469, |
|
"logps/rejected": -407.9169006347656, |
|
"loss": 0.4121, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 0.9093106985092163, |
|
"rewards/margins": 0.3055870234966278, |
|
"rewards/rejected": 0.6037237644195557, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.5687203791469194, |
|
"grad_norm": 24.190229582222074, |
|
"learning_rate": 4.5025027361734613e-07, |
|
"logits/chosen": 145.62486267089844, |
|
"logits/rejected": 138.91897583007812, |
|
"logps/chosen": -376.835205078125, |
|
"logps/rejected": -432.81298828125, |
|
"loss": 0.3514, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.7815009355545044, |
|
"rewards/margins": 1.4220384359359741, |
|
"rewards/rejected": -0.640537440776825, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.7582938388625592, |
|
"grad_norm": 24.559258380924415, |
|
"learning_rate": 3.893311157806091e-07, |
|
"logits/chosen": 134.8649139404297, |
|
"logits/rejected": 123.98270416259766, |
|
"logps/chosen": -324.2713317871094, |
|
"logps/rejected": -366.3738098144531, |
|
"loss": 0.3442, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 1.4961439371109009, |
|
"rewards/margins": 1.9210717678070068, |
|
"rewards/rejected": -0.42492780089378357, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.9478672985781991, |
|
"grad_norm": 25.998652882986566, |
|
"learning_rate": 3.126631330646801e-07, |
|
"logits/chosen": 153.93051147460938, |
|
"logits/rejected": 157.34010314941406, |
|
"logps/chosen": -383.385009765625, |
|
"logps/rejected": -484.5369567871094, |
|
"loss": 0.3087, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": 1.7879406213760376, |
|
"rewards/margins": 2.1984703540802, |
|
"rewards/rejected": -0.4105294644832611, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 1.1374407582938388, |
|
"grad_norm": 20.98032293493931, |
|
"learning_rate": 2.2891223348923882e-07, |
|
"logits/chosen": 145.45932006835938, |
|
"logits/rejected": 149.6053466796875, |
|
"logps/chosen": -359.0306396484375, |
|
"logps/rejected": -456.782470703125, |
|
"loss": 0.257, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": 1.9487521648406982, |
|
"rewards/margins": 2.969599723815918, |
|
"rewards/rejected": -1.0208473205566406, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 1.3270142180094786, |
|
"grad_norm": 19.65205364182723, |
|
"learning_rate": 1.4754491880085317e-07, |
|
"logits/chosen": 140.3638458251953, |
|
"logits/rejected": 141.1572265625, |
|
"logps/chosen": -328.19830322265625, |
|
"logps/rejected": -429.80487060546875, |
|
"loss": 0.2359, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": 1.9472763538360596, |
|
"rewards/margins": 2.761829137802124, |
|
"rewards/rejected": -0.8145527839660645, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 1.5165876777251186, |
|
"grad_norm": 19.33483383354362, |
|
"learning_rate": 7.775827023107834e-08, |
|
"logits/chosen": 127.70585632324219, |
|
"logits/rejected": 143.24069213867188, |
|
"logps/chosen": -311.0325622558594, |
|
"logps/rejected": -434.9661560058594, |
|
"loss": 0.2084, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": 2.130922794342041, |
|
"rewards/margins": 3.2173447608947754, |
|
"rewards/rejected": -1.0864222049713135, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 1.7061611374407581, |
|
"grad_norm": 21.62644101836202, |
|
"learning_rate": 2.7440387297912122e-08, |
|
"logits/chosen": 128.0491180419922, |
|
"logits/rejected": 140.35018920898438, |
|
"logps/chosen": -341.66192626953125, |
|
"logps/rejected": -459.8998107910156, |
|
"loss": 0.2046, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": 2.238457202911377, |
|
"rewards/margins": 3.3493576049804688, |
|
"rewards/rejected": -1.110900640487671, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 1.8957345971563981, |
|
"grad_norm": 19.399744221303617, |
|
"learning_rate": 2.27878296044029e-09, |
|
"logits/chosen": 134.3742218017578, |
|
"logits/rejected": 134.44503784179688, |
|
"logps/chosen": -334.2799377441406, |
|
"logps/rejected": -428.43621826171875, |
|
"loss": 0.1949, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": 2.384758472442627, |
|
"rewards/margins": 2.9683709144592285, |
|
"rewards/rejected": -0.5836124420166016, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.971563981042654, |
|
"step": 104, |
|
"total_flos": 0.0, |
|
"train_loss": 0.29816230271871275, |
|
"train_runtime": 2165.9381, |
|
"train_samples_per_second": 6.233, |
|
"train_steps_per_second": 0.048 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 104, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 500, |
|
"total_flos": 0.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|