|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.99581589958159, |
|
"eval_steps": 500, |
|
"global_step": 119, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.166666666666666e-08, |
|
"logits/chosen": -2.716510057449341, |
|
"logits/rejected": -2.7105798721313477, |
|
"logps/chosen": -175.55441284179688, |
|
"logps/rejected": -222.94068908691406, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.1666666666666667e-07, |
|
"logits/chosen": -2.7519237995147705, |
|
"logits/rejected": -2.716820001602173, |
|
"logps/chosen": -186.0233154296875, |
|
"logps/rejected": -250.5030059814453, |
|
"loss": 0.6918, |
|
"rewards/accuracies": 0.5347222089767456, |
|
"rewards/chosen": -0.003977265674620867, |
|
"rewards/margins": 0.008249405771493912, |
|
"rewards/rejected": -0.012226670980453491, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.931352528237397e-07, |
|
"logits/chosen": -2.656189441680908, |
|
"logits/rejected": -2.6212830543518066, |
|
"logps/chosen": -247.41494750976562, |
|
"logps/rejected": -254.1903533935547, |
|
"loss": 0.691, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -0.2717934250831604, |
|
"rewards/margins": 0.07151587307453156, |
|
"rewards/rejected": -0.34330934286117554, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.658920803689553e-07, |
|
"logits/chosen": -2.602332592010498, |
|
"logits/rejected": -2.5776302814483643, |
|
"logps/chosen": -248.0190887451172, |
|
"logps/rejected": -270.84722900390625, |
|
"loss": 0.6919, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.442482054233551, |
|
"rewards/margins": 0.07939548790454865, |
|
"rewards/rejected": -0.5218775272369385, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.201712553872657e-07, |
|
"logits/chosen": -2.5487563610076904, |
|
"logits/rejected": -2.5239639282226562, |
|
"logps/chosen": -244.3371124267578, |
|
"logps/rejected": -259.01324462890625, |
|
"loss": 0.6885, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -0.2263740599155426, |
|
"rewards/margins": 0.041686661541461945, |
|
"rewards/rejected": -0.26806074380874634, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 3.598859066780754e-07, |
|
"logits/chosen": -2.5481457710266113, |
|
"logits/rejected": -2.51816463470459, |
|
"logps/chosen": -246.34762573242188, |
|
"logps/rejected": -275.49151611328125, |
|
"loss": 0.6676, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.2860008180141449, |
|
"rewards/margins": 0.09634693711996078, |
|
"rewards/rejected": -0.3823477625846863, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.9019570347986706e-07, |
|
"logits/chosen": -2.431131362915039, |
|
"logits/rejected": -2.448798656463623, |
|
"logps/chosen": -269.74493408203125, |
|
"logps/rejected": -303.2239990234375, |
|
"loss": 0.6669, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.5165982246398926, |
|
"rewards/margins": 0.1851460039615631, |
|
"rewards/rejected": -0.7017441987991333, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 2.1706525253979534e-07, |
|
"logits/chosen": -2.425497055053711, |
|
"logits/rejected": -2.391167402267456, |
|
"logps/chosen": -265.3540344238281, |
|
"logps/rejected": -305.040283203125, |
|
"loss": 0.6602, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.5042034983634949, |
|
"rewards/margins": 0.13428547978401184, |
|
"rewards/rejected": -0.6384889483451843, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.4675360263490295e-07, |
|
"logits/chosen": -2.4082672595977783, |
|
"logits/rejected": -2.3618528842926025, |
|
"logps/chosen": -234.425537109375, |
|
"logps/rejected": -287.005859375, |
|
"loss": 0.6639, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.44368410110473633, |
|
"rewards/margins": 0.15288691222667694, |
|
"rewards/rejected": -0.5965710282325745, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 8.527854855097224e-08, |
|
"logits/chosen": -2.4282336235046387, |
|
"logits/rejected": -2.3792128562927246, |
|
"logps/chosen": -274.50714111328125, |
|
"logps/rejected": -271.96221923828125, |
|
"loss": 0.6585, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.41591668128967285, |
|
"rewards/margins": 0.14339400827884674, |
|
"rewards/rejected": -0.5593106746673584, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 3.790158337517127e-08, |
|
"logits/chosen": -2.379441738128662, |
|
"logits/rejected": -2.3764429092407227, |
|
"logps/chosen": -241.2744598388672, |
|
"logps/rejected": -290.3218688964844, |
|
"loss": 0.6502, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.41449299454689026, |
|
"rewards/margins": 0.1260414719581604, |
|
"rewards/rejected": -0.5405344367027283, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 8.677580722139671e-09, |
|
"logits/chosen": -2.385240077972412, |
|
"logits/rejected": -2.4066944122314453, |
|
"logps/chosen": -242.5824737548828, |
|
"logps/rejected": -280.25323486328125, |
|
"loss": 0.6612, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -0.47699427604675293, |
|
"rewards/margins": 0.030102651566267014, |
|
"rewards/rejected": -0.5070968866348267, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 119, |
|
"total_flos": 0.0, |
|
"train_loss": 0.672575786334126, |
|
"train_runtime": 1976.2133, |
|
"train_samples_per_second": 7.733, |
|
"train_steps_per_second": 0.06 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 119, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|