0.001_idpo_4iters_iter_2 / trainer_state.json
ShenaoZhang's picture
Model save
7d4ee2d verified
raw
history blame
No virus
6.37 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.99581589958159,
"eval_steps": 500,
"global_step": 119,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.01,
"learning_rate": 4.166666666666666e-08,
"logits/chosen": -2.716510057449341,
"logits/rejected": -2.7105798721313477,
"logps/chosen": -175.55441284179688,
"logps/rejected": -222.94068908691406,
"loss": 0.6931,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 1
},
{
"epoch": 0.08,
"learning_rate": 4.1666666666666667e-07,
"logits/chosen": -2.7519237995147705,
"logits/rejected": -2.716820001602173,
"logps/chosen": -186.0233154296875,
"logps/rejected": -250.5030059814453,
"loss": 0.6918,
"rewards/accuracies": 0.5347222089767456,
"rewards/chosen": -0.003977265674620867,
"rewards/margins": 0.008249405771493912,
"rewards/rejected": -0.012226670980453491,
"step": 10
},
{
"epoch": 0.17,
"learning_rate": 4.931352528237397e-07,
"logits/chosen": -2.656189441680908,
"logits/rejected": -2.6212830543518066,
"logps/chosen": -247.41494750976562,
"logps/rejected": -254.1903533935547,
"loss": 0.691,
"rewards/accuracies": 0.543749988079071,
"rewards/chosen": -0.2717934250831604,
"rewards/margins": 0.07151587307453156,
"rewards/rejected": -0.34330934286117554,
"step": 20
},
{
"epoch": 0.25,
"learning_rate": 4.658920803689553e-07,
"logits/chosen": -2.602332592010498,
"logits/rejected": -2.5776302814483643,
"logps/chosen": -248.0190887451172,
"logps/rejected": -270.84722900390625,
"loss": 0.6919,
"rewards/accuracies": 0.5874999761581421,
"rewards/chosen": -0.442482054233551,
"rewards/margins": 0.07939548790454865,
"rewards/rejected": -0.5218775272369385,
"step": 30
},
{
"epoch": 0.33,
"learning_rate": 4.201712553872657e-07,
"logits/chosen": -2.5487563610076904,
"logits/rejected": -2.5239639282226562,
"logps/chosen": -244.3371124267578,
"logps/rejected": -259.01324462890625,
"loss": 0.6885,
"rewards/accuracies": 0.543749988079071,
"rewards/chosen": -0.2263740599155426,
"rewards/margins": 0.041686661541461945,
"rewards/rejected": -0.26806074380874634,
"step": 40
},
{
"epoch": 0.42,
"learning_rate": 3.598859066780754e-07,
"logits/chosen": -2.5481457710266113,
"logits/rejected": -2.51816463470459,
"logps/chosen": -246.34762573242188,
"logps/rejected": -275.49151611328125,
"loss": 0.6676,
"rewards/accuracies": 0.625,
"rewards/chosen": -0.2860008180141449,
"rewards/margins": 0.09634693711996078,
"rewards/rejected": -0.3823477625846863,
"step": 50
},
{
"epoch": 0.5,
"learning_rate": 2.9019570347986706e-07,
"logits/chosen": -2.431131362915039,
"logits/rejected": -2.448798656463623,
"logps/chosen": -269.74493408203125,
"logps/rejected": -303.2239990234375,
"loss": 0.6669,
"rewards/accuracies": 0.6312500238418579,
"rewards/chosen": -0.5165982246398926,
"rewards/margins": 0.1851460039615631,
"rewards/rejected": -0.7017441987991333,
"step": 60
},
{
"epoch": 0.59,
"learning_rate": 2.1706525253979534e-07,
"logits/chosen": -2.425497055053711,
"logits/rejected": -2.391167402267456,
"logps/chosen": -265.3540344238281,
"logps/rejected": -305.040283203125,
"loss": 0.6602,
"rewards/accuracies": 0.643750011920929,
"rewards/chosen": -0.5042034983634949,
"rewards/margins": 0.13428547978401184,
"rewards/rejected": -0.6384889483451843,
"step": 70
},
{
"epoch": 0.67,
"learning_rate": 1.4675360263490295e-07,
"logits/chosen": -2.4082672595977783,
"logits/rejected": -2.3618528842926025,
"logps/chosen": -234.425537109375,
"logps/rejected": -287.005859375,
"loss": 0.6639,
"rewards/accuracies": 0.5625,
"rewards/chosen": -0.44368410110473633,
"rewards/margins": 0.15288691222667694,
"rewards/rejected": -0.5965710282325745,
"step": 80
},
{
"epoch": 0.75,
"learning_rate": 8.527854855097224e-08,
"logits/chosen": -2.4282336235046387,
"logits/rejected": -2.3792128562927246,
"logps/chosen": -274.50714111328125,
"logps/rejected": -271.96221923828125,
"loss": 0.6585,
"rewards/accuracies": 0.6312500238418579,
"rewards/chosen": -0.41591668128967285,
"rewards/margins": 0.14339400827884674,
"rewards/rejected": -0.5593106746673584,
"step": 90
},
{
"epoch": 0.84,
"learning_rate": 3.790158337517127e-08,
"logits/chosen": -2.379441738128662,
"logits/rejected": -2.3764429092407227,
"logps/chosen": -241.2744598388672,
"logps/rejected": -290.3218688964844,
"loss": 0.6502,
"rewards/accuracies": 0.59375,
"rewards/chosen": -0.41449299454689026,
"rewards/margins": 0.1260414719581604,
"rewards/rejected": -0.5405344367027283,
"step": 100
},
{
"epoch": 0.92,
"learning_rate": 8.677580722139671e-09,
"logits/chosen": -2.385240077972412,
"logits/rejected": -2.4066944122314453,
"logps/chosen": -242.5824737548828,
"logps/rejected": -280.25323486328125,
"loss": 0.6612,
"rewards/accuracies": 0.5375000238418579,
"rewards/chosen": -0.47699427604675293,
"rewards/margins": 0.030102651566267014,
"rewards/rejected": -0.5070968866348267,
"step": 110
},
{
"epoch": 1.0,
"step": 119,
"total_flos": 0.0,
"train_loss": 0.672575786334126,
"train_runtime": 1976.2133,
"train_samples_per_second": 7.733,
"train_steps_per_second": 0.06
}
],
"logging_steps": 10,
"max_steps": 119,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 100,
"total_flos": 0.0,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}