{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 100, "global_step": 96, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.010416666666666666, "grad_norm": 23.252944189766428, "learning_rate": 8.639811904061041e-08, "logits/chosen": -2.590585231781006, "logits/rejected": -2.5664222240448, "logps/chosen": -80.29847717285156, "logps/rejected": -53.10200881958008, "loss": 0.6931, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1 }, { "epoch": 0.10416666666666667, "grad_norm": 21.182382477045586, "learning_rate": 8.639811904061041e-07, "logits/chosen": -2.5559909343719482, "logits/rejected": -2.5379226207733154, "logps/chosen": -87.82003021240234, "logps/rejected": -80.9332046508789, "loss": 0.6931, "rewards/accuracies": 0.2013888955116272, "rewards/chosen": 0.00335866492241621, "rewards/margins": -0.0003586374514270574, "rewards/rejected": 0.003717302344739437, "step": 10 }, { "epoch": 0.20833333333333334, "grad_norm": 17.233720974528225, "learning_rate": 7.635182612891153e-07, "logits/chosen": -2.5788445472717285, "logits/rejected": -2.528242588043213, "logps/chosen": -101.24139404296875, "logps/rejected": -88.24673461914062, "loss": 0.6743, "rewards/accuracies": 0.34375, "rewards/chosen": 0.07447633892297745, "rewards/margins": 0.016661062836647034, "rewards/rejected": 0.057815272361040115, "step": 20 }, { "epoch": 0.3125, "grad_norm": 25.6903065091126, "learning_rate": 6.630553321721264e-07, "logits/chosen": -2.4120750427246094, "logits/rejected": -2.4285130500793457, "logps/chosen": -67.22891235351562, "logps/rejected": -77.24456787109375, "loss": 0.6697, "rewards/accuracies": 0.32499998807907104, "rewards/chosen": 0.007355662528425455, "rewards/margins": 0.08921505510807037, "rewards/rejected": -0.08185939490795135, "step": 30 }, { "epoch": 0.4166666666666667, "grad_norm": 19.22554951858603, "learning_rate": 5.625924030551376e-07, "logits/chosen": -2.4893252849578857, "logits/rejected": -2.4751393795013428, "logps/chosen": -74.36286163330078, "logps/rejected": -75.44730377197266, "loss": 0.6618, "rewards/accuracies": 0.28125, "rewards/chosen": -0.07824570685625076, "rewards/margins": 0.08888493478298187, "rewards/rejected": -0.16713064908981323, "step": 40 }, { "epoch": 0.5208333333333334, "grad_norm": 19.08251678108842, "learning_rate": 4.6212947393814867e-07, "logits/chosen": -2.4168150424957275, "logits/rejected": -2.4291889667510986, "logps/chosen": -52.590057373046875, "logps/rejected": -62.784461975097656, "loss": 0.6552, "rewards/accuracies": 0.23125000298023224, "rewards/chosen": -0.059627026319503784, "rewards/margins": 0.07718921452760696, "rewards/rejected": -0.13681624829769135, "step": 50 }, { "epoch": 0.625, "grad_norm": 21.66166836395161, "learning_rate": 3.6166654482115984e-07, "logits/chosen": -2.489243984222412, "logits/rejected": -2.4673056602478027, "logps/chosen": -82.04798889160156, "logps/rejected": -87.74610137939453, "loss": 0.6569, "rewards/accuracies": 0.33125001192092896, "rewards/chosen": -0.10516528785228729, "rewards/margins": 0.19169361889362335, "rewards/rejected": -0.29685890674591064, "step": 60 }, { "epoch": 0.7291666666666666, "grad_norm": 19.133621385765668, "learning_rate": 2.6120361570417096e-07, "logits/chosen": -2.452115535736084, "logits/rejected": -2.4336700439453125, "logps/chosen": -96.10133361816406, "logps/rejected": -90.39111328125, "loss": 0.6395, "rewards/accuracies": 0.3125, "rewards/chosen": 0.008597126230597496, "rewards/margins": 0.1830345243215561, "rewards/rejected": -0.17443740367889404, "step": 70 }, { "epoch": 0.8333333333333334, "grad_norm": 34.22517043366701, "learning_rate": 1.6074068658718216e-07, "logits/chosen": -2.4384665489196777, "logits/rejected": -2.3800644874572754, "logps/chosen": -83.1066665649414, "logps/rejected": -85.15121459960938, "loss": 0.6206, "rewards/accuracies": 0.42500001192092896, "rewards/chosen": 0.0896192193031311, "rewards/margins": 0.29328036308288574, "rewards/rejected": -0.20366115868091583, "step": 80 }, { "epoch": 0.9375, "grad_norm": 28.270052144663545, "learning_rate": 6.027775747019331e-08, "logits/chosen": -2.3832895755767822, "logits/rejected": -2.3764870166778564, "logps/chosen": -53.896759033203125, "logps/rejected": -69.18147277832031, "loss": 0.6399, "rewards/accuracies": 0.28125, "rewards/chosen": 0.0029330668039619923, "rewards/margins": 0.20463672280311584, "rewards/rejected": -0.20170363783836365, "step": 90 }, { "epoch": 1.0, "step": 96, "total_flos": 0.0, "train_loss": 0.6540692721803983, "train_runtime": 977.0227, "train_samples_per_second": 6.257, "train_steps_per_second": 0.098 } ], "logging_steps": 10, "max_steps": 96, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 100, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }