{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.99581589958159, "eval_steps": 500, "global_step": 119, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.01, "learning_rate": 4.166666666666666e-08, "logits/chosen": -2.716510057449341, "logits/rejected": -2.7105798721313477, "logps/chosen": -175.55441284179688, "logps/rejected": -222.94068908691406, "loss": 0.6931, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1 }, { "epoch": 0.08, "learning_rate": 4.1666666666666667e-07, "logits/chosen": -2.7519237995147705, "logits/rejected": -2.716820001602173, "logps/chosen": -186.0233154296875, "logps/rejected": -250.5030059814453, "loss": 0.6918, "rewards/accuracies": 0.5347222089767456, "rewards/chosen": -0.003977265674620867, "rewards/margins": 0.008249405771493912, "rewards/rejected": -0.012226670980453491, "step": 10 }, { "epoch": 0.17, "learning_rate": 4.931352528237397e-07, "logits/chosen": -2.656189441680908, "logits/rejected": -2.6212830543518066, "logps/chosen": -247.41494750976562, "logps/rejected": -254.1903533935547, "loss": 0.691, "rewards/accuracies": 0.543749988079071, "rewards/chosen": -0.2717934250831604, "rewards/margins": 0.07151587307453156, "rewards/rejected": -0.34330934286117554, "step": 20 }, { "epoch": 0.25, "learning_rate": 4.658920803689553e-07, "logits/chosen": -2.602332592010498, "logits/rejected": -2.5776302814483643, "logps/chosen": -248.0190887451172, "logps/rejected": -270.84722900390625, "loss": 0.6919, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -0.442482054233551, "rewards/margins": 0.07939548790454865, "rewards/rejected": -0.5218775272369385, "step": 30 }, { "epoch": 0.33, "learning_rate": 4.201712553872657e-07, "logits/chosen": -2.5487563610076904, "logits/rejected": -2.5239639282226562, "logps/chosen": -244.3371124267578, "logps/rejected": -259.01324462890625, "loss": 0.6885, "rewards/accuracies": 0.543749988079071, "rewards/chosen": -0.2263740599155426, "rewards/margins": 0.041686661541461945, "rewards/rejected": -0.26806074380874634, "step": 40 }, { "epoch": 0.42, "learning_rate": 3.598859066780754e-07, "logits/chosen": -2.5481457710266113, "logits/rejected": -2.51816463470459, "logps/chosen": -246.34762573242188, "logps/rejected": -275.49151611328125, "loss": 0.6676, "rewards/accuracies": 0.625, "rewards/chosen": -0.2860008180141449, "rewards/margins": 0.09634693711996078, "rewards/rejected": -0.3823477625846863, "step": 50 }, { "epoch": 0.5, "learning_rate": 2.9019570347986706e-07, "logits/chosen": -2.431131362915039, "logits/rejected": -2.448798656463623, "logps/chosen": -269.74493408203125, "logps/rejected": -303.2239990234375, "loss": 0.6669, "rewards/accuracies": 0.6312500238418579, "rewards/chosen": -0.5165982246398926, "rewards/margins": 0.1851460039615631, "rewards/rejected": -0.7017441987991333, "step": 60 }, { "epoch": 0.59, "learning_rate": 2.1706525253979534e-07, "logits/chosen": -2.425497055053711, "logits/rejected": -2.391167402267456, "logps/chosen": -265.3540344238281, "logps/rejected": -305.040283203125, "loss": 0.6602, "rewards/accuracies": 0.643750011920929, "rewards/chosen": -0.5042034983634949, "rewards/margins": 0.13428547978401184, "rewards/rejected": -0.6384889483451843, "step": 70 }, { "epoch": 0.67, "learning_rate": 1.4675360263490295e-07, "logits/chosen": -2.4082672595977783, "logits/rejected": -2.3618528842926025, "logps/chosen": -234.425537109375, "logps/rejected": -287.005859375, "loss": 0.6639, "rewards/accuracies": 0.5625, "rewards/chosen": -0.44368410110473633, "rewards/margins": 0.15288691222667694, "rewards/rejected": -0.5965710282325745, "step": 80 }, { "epoch": 0.75, "learning_rate": 8.527854855097224e-08, "logits/chosen": -2.4282336235046387, "logits/rejected": -2.3792128562927246, "logps/chosen": -274.50714111328125, "logps/rejected": -271.96221923828125, "loss": 0.6585, "rewards/accuracies": 0.6312500238418579, "rewards/chosen": -0.41591668128967285, "rewards/margins": 0.14339400827884674, "rewards/rejected": -0.5593106746673584, "step": 90 }, { "epoch": 0.84, "learning_rate": 3.790158337517127e-08, "logits/chosen": -2.379441738128662, "logits/rejected": -2.3764429092407227, "logps/chosen": -241.2744598388672, "logps/rejected": -290.3218688964844, "loss": 0.6502, "rewards/accuracies": 0.59375, "rewards/chosen": -0.41449299454689026, "rewards/margins": 0.1260414719581604, "rewards/rejected": -0.5405344367027283, "step": 100 }, { "epoch": 0.92, "learning_rate": 8.677580722139671e-09, "logits/chosen": -2.385240077972412, "logits/rejected": -2.4066944122314453, "logps/chosen": -242.5824737548828, "logps/rejected": -280.25323486328125, "loss": 0.6612, "rewards/accuracies": 0.5375000238418579, "rewards/chosen": -0.47699427604675293, "rewards/margins": 0.030102651566267014, "rewards/rejected": -0.5070968866348267, "step": 110 }, { "epoch": 1.0, "step": 119, "total_flos": 0.0, "train_loss": 0.672575786334126, "train_runtime": 1976.2133, "train_samples_per_second": 7.733, "train_steps_per_second": 0.06 } ], "logging_steps": 10, "max_steps": 119, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 100, "total_flos": 0.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }