{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.992, "eval_steps": 200, "global_step": 62, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.016, "grad_norm": 96.80832392576436, "learning_rate": 7.142857142857142e-08, "logits/generated": -1.177710771560669, "logits/real": -0.5424066185951233, "logps/generated": -206.68331909179688, "logps/real": -268.4350280761719, "loss": 0.956, "rewards/accuracies": 0.0, "rewards/generated": 0.0, "rewards/margins": 0.0, "rewards/real": 0.0, "step": 1 }, { "epoch": 0.16, "grad_norm": 186.85535273079182, "learning_rate": 4.727272727272727e-07, "logits/generated": -0.9581692218780518, "logits/real": -0.7258952260017395, "logps/generated": -263.182373046875, "logps/real": -268.8551025390625, "loss": 0.9057, "rewards/accuracies": 0.5972222089767456, "rewards/generated": 0.11249147355556488, "rewards/margins": 0.11431904882192612, "rewards/real": 0.22681055963039398, "step": 10 }, { "epoch": 0.32, "grad_norm": 118.11338598390404, "learning_rate": 3.818181818181818e-07, "logits/generated": -0.891255259513855, "logits/real": -0.6647359132766724, "logps/generated": -264.0565185546875, "logps/real": -256.42901611328125, "loss": 0.785, "rewards/accuracies": 0.7250000238418579, "rewards/generated": 0.7130780220031738, "rewards/margins": 0.5250438451766968, "rewards/real": 1.2381219863891602, "step": 20 }, { "epoch": 0.48, "grad_norm": 89.31545609503772, "learning_rate": 2.909090909090909e-07, "logits/generated": -0.8781732320785522, "logits/real": -0.5925976634025574, "logps/generated": -256.00799560546875, "logps/real": -247.38980102539062, "loss": 0.7838, "rewards/accuracies": 0.75, "rewards/generated": 1.1103136539459229, "rewards/margins": 0.712864875793457, "rewards/real": 1.8231786489486694, "step": 30 }, { "epoch": 0.64, "grad_norm": 88.34237989806086, "learning_rate": 2e-07, "logits/generated": -0.8793425559997559, "logits/real": -0.6354281306266785, "logps/generated": -263.5761413574219, "logps/real": -255.0084686279297, "loss": 0.7397, "rewards/accuracies": 0.7749999761581421, "rewards/generated": 1.0507025718688965, "rewards/margins": 0.881763756275177, "rewards/real": 1.9324661493301392, "step": 40 }, { "epoch": 0.8, "grad_norm": 91.22613773952635, "learning_rate": 1.0909090909090908e-07, "logits/generated": -0.8102799654006958, "logits/real": -0.5707312822341919, "logps/generated": -248.6565704345703, "logps/real": -260.7277526855469, "loss": 0.7153, "rewards/accuracies": 0.800000011920929, "rewards/generated": 1.1541557312011719, "rewards/margins": 0.9233818054199219, "rewards/real": 2.0775375366210938, "step": 50 }, { "epoch": 0.96, "grad_norm": 86.27514871900985, "learning_rate": 1.818181818181818e-08, "logits/generated": -0.9142637252807617, "logits/real": -0.6623004078865051, "logps/generated": -254.97494506835938, "logps/real": -244.65640258789062, "loss": 0.7192, "rewards/accuracies": 0.8125, "rewards/generated": 1.215023159980774, "rewards/margins": 0.8442209959030151, "rewards/real": 2.059244155883789, "step": 60 }, { "epoch": 0.992, "step": 62, "total_flos": 0.0, "train_loss": 0.770801761457997, "train_runtime": 772.1202, "train_samples_per_second": 2.585, "train_steps_per_second": 0.08 } ], "logging_steps": 10, "max_steps": 62, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 200, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 4, "trial_name": null, "trial_params": null }