|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 100, |
|
"global_step": 157, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.006369426751592357, |
|
"grad_norm": 2514.601414684904, |
|
"learning_rate": 3.125e-08, |
|
"logits/generated": -2.661752223968506, |
|
"logits/real": -2.483980894088745, |
|
"logps/generated": -429.17132568359375, |
|
"logps/real": -342.051025390625, |
|
"loss": 1.3612, |
|
"rewards/accuracies": 0.0, |
|
"rewards/generated": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/real": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.06369426751592357, |
|
"grad_norm": 0.31574006570864244, |
|
"learning_rate": 3.1249999999999997e-07, |
|
"logits/generated": -2.668369770050049, |
|
"logits/real": -2.424091339111328, |
|
"logps/generated": -450.4893798828125, |
|
"logps/real": -144.69952392578125, |
|
"loss": 0.4008, |
|
"rewards/accuracies": 0.8888888955116272, |
|
"rewards/generated": -2.1318063735961914, |
|
"rewards/margins": 21.866958618164062, |
|
"rewards/real": 19.735153198242188, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.12738853503184713, |
|
"grad_norm": 0.005520241230176221, |
|
"learning_rate": 4.858156028368794e-07, |
|
"logits/generated": -2.692908763885498, |
|
"logits/real": -2.329023599624634, |
|
"logps/generated": -478.9815368652344, |
|
"logps/real": -0.014682354405522346, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -4.981024742126465, |
|
"rewards/margins": 39.18465805053711, |
|
"rewards/real": 34.203636169433594, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.1910828025477707, |
|
"grad_norm": 0.002789639064368334, |
|
"learning_rate": 4.50354609929078e-07, |
|
"logits/generated": -2.705698013305664, |
|
"logits/real": -2.342294216156006, |
|
"logps/generated": -486.1914978027344, |
|
"logps/real": -0.004650969058275223, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -5.702020645141602, |
|
"rewards/margins": 39.90666198730469, |
|
"rewards/real": 34.20463943481445, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.25477707006369427, |
|
"grad_norm": 0.0019297231936813803, |
|
"learning_rate": 4.148936170212766e-07, |
|
"logits/generated": -2.7122185230255127, |
|
"logits/real": -2.348639965057373, |
|
"logps/generated": -489.88970947265625, |
|
"logps/real": -0.002834505634382367, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -6.071843147277832, |
|
"rewards/margins": 40.27666473388672, |
|
"rewards/real": 34.2048225402832, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.3184713375796178, |
|
"grad_norm": 0.0014726852980064813, |
|
"learning_rate": 3.7943262411347514e-07, |
|
"logits/generated": -2.71730375289917, |
|
"logits/real": -2.3547444343566895, |
|
"logps/generated": -492.354248046875, |
|
"logps/real": -0.0020735759753733873, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -6.318298816680908, |
|
"rewards/margins": 40.523193359375, |
|
"rewards/real": 34.20489501953125, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.3821656050955414, |
|
"grad_norm": 0.0012108227405915682, |
|
"learning_rate": 3.4397163120567375e-07, |
|
"logits/generated": -2.7216479778289795, |
|
"logits/real": -2.359346866607666, |
|
"logps/generated": -493.62725830078125, |
|
"logps/real": -0.0016389258671551943, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -6.445591926574707, |
|
"rewards/margins": 40.65053176879883, |
|
"rewards/real": 34.20494079589844, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.445859872611465, |
|
"grad_norm": 0.0010092968797445821, |
|
"learning_rate": 3.085106382978723e-07, |
|
"logits/generated": -2.7243549823760986, |
|
"logits/real": -2.3620219230651855, |
|
"logps/generated": -494.82586669921875, |
|
"logps/real": -0.0013623478589579463, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -6.565457820892334, |
|
"rewards/margins": 40.77042770385742, |
|
"rewards/real": 34.20496368408203, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.5095541401273885, |
|
"grad_norm": 0.0008924667007579917, |
|
"learning_rate": 2.730496453900709e-07, |
|
"logits/generated": -2.726926565170288, |
|
"logits/real": -2.362954616546631, |
|
"logps/generated": -495.93377685546875, |
|
"logps/real": -0.0011800352949649096, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -6.676251411437988, |
|
"rewards/margins": 40.88123321533203, |
|
"rewards/real": 34.204986572265625, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.5732484076433121, |
|
"grad_norm": 0.0008155704567801252, |
|
"learning_rate": 2.375886524822695e-07, |
|
"logits/generated": -2.72874116897583, |
|
"logits/real": -2.366927146911621, |
|
"logps/generated": -496.6890563964844, |
|
"logps/real": -0.0010528427083045244, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -6.751776218414307, |
|
"rewards/margins": 40.95677185058594, |
|
"rewards/real": 34.204994201660156, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.6369426751592356, |
|
"grad_norm": 0.000725474761841383, |
|
"learning_rate": 2.0212765957446807e-07, |
|
"logits/generated": -2.7310385704040527, |
|
"logits/real": -2.369218349456787, |
|
"logps/generated": -497.72625732421875, |
|
"logps/real": -0.000954283110331744, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -6.855503082275391, |
|
"rewards/margins": 41.06051254272461, |
|
"rewards/real": 34.20500946044922, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.6369426751592356, |
|
"eval_logits/generated": -2.661693572998047, |
|
"eval_logits/real": -2.609511137008667, |
|
"eval_logps/generated": -259.66143798828125, |
|
"eval_logps/real": -210.63931274414062, |
|
"eval_loss": 0.9619492292404175, |
|
"eval_rewards/accuracies": 0.890625, |
|
"eval_rewards/generated": -0.14410093426704407, |
|
"eval_rewards/margins": 0.3460058867931366, |
|
"eval_rewards/real": 0.20190495252609253, |
|
"eval_runtime": 37.6272, |
|
"eval_samples_per_second": 13.288, |
|
"eval_steps_per_second": 0.425, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.7006369426751592, |
|
"grad_norm": 0.0006953242722311237, |
|
"learning_rate": 1.6666666666666665e-07, |
|
"logits/generated": -2.732168436050415, |
|
"logits/real": -2.370328426361084, |
|
"logps/generated": -497.7915954589844, |
|
"logps/real": -0.0008821273222565651, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -6.86203145980835, |
|
"rewards/margins": 41.06704330444336, |
|
"rewards/real": 34.20501708984375, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.7643312101910829, |
|
"grad_norm": 0.0006463359840785572, |
|
"learning_rate": 1.3120567375886523e-07, |
|
"logits/generated": -2.7338597774505615, |
|
"logits/real": -2.3714773654937744, |
|
"logps/generated": -498.8688049316406, |
|
"logps/real": -0.0008279000176116824, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -6.969751834869385, |
|
"rewards/margins": 41.174774169921875, |
|
"rewards/real": 34.205020904541016, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.8280254777070064, |
|
"grad_norm": 0.0006205498614208318, |
|
"learning_rate": 9.574468085106382e-08, |
|
"logits/generated": -2.7348275184631348, |
|
"logits/real": -2.3729214668273926, |
|
"logps/generated": -498.8643493652344, |
|
"logps/real": -0.0007825180655345321, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -6.969304084777832, |
|
"rewards/margins": 41.17433547973633, |
|
"rewards/real": 34.20502471923828, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.89171974522293, |
|
"grad_norm": 0.0005942730744268325, |
|
"learning_rate": 6.02836879432624e-08, |
|
"logits/generated": -2.7360432147979736, |
|
"logits/real": -2.374084949493408, |
|
"logps/generated": -499.4146423339844, |
|
"logps/real": -0.0007567574502900243, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -7.024338722229004, |
|
"rewards/margins": 41.229366302490234, |
|
"rewards/real": 34.20502471923828, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.9554140127388535, |
|
"grad_norm": 0.0005956400007297931, |
|
"learning_rate": 2.4822695035460993e-08, |
|
"logits/generated": -2.73626708984375, |
|
"logits/real": -2.374666690826416, |
|
"logps/generated": -499.5492248535156, |
|
"logps/real": -0.0007398539455607533, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -7.0377936363220215, |
|
"rewards/margins": 41.242820739746094, |
|
"rewards/real": 34.20502471923828, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 157, |
|
"total_flos": 0.0, |
|
"train_loss": 0.03164779691052703, |
|
"train_runtime": 1178.5993, |
|
"train_samples_per_second": 4.242, |
|
"train_steps_per_second": 0.133 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 157, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|