|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 15.968063872255488, |
|
"global_step": 32000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 9.916833000665336e-05, |
|
"loss": 7.7087, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 9.833666001330672e-05, |
|
"loss": 7.4465, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 9.750499001996009e-05, |
|
"loss": 7.1976, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 9.667332002661345e-05, |
|
"loss": 6.9445, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 9.58416500332668e-05, |
|
"loss": 6.6812, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 9.500998003992016e-05, |
|
"loss": 6.2449, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 9.417831004657353e-05, |
|
"loss": 5.6326, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 9.334664005322689e-05, |
|
"loss": 4.9326, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 9.251497005988024e-05, |
|
"loss": 4.4854, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 9.16833000665336e-05, |
|
"loss": 4.1808, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"learning_rate": 9.085163007318697e-05, |
|
"loss": 3.9169, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"learning_rate": 9.001996007984033e-05, |
|
"loss": 3.7161, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 3.24, |
|
"learning_rate": 8.918829008649369e-05, |
|
"loss": 3.5102, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 3.49, |
|
"learning_rate": 8.835662009314704e-05, |
|
"loss": 3.3782, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 3.74, |
|
"learning_rate": 8.752495009980041e-05, |
|
"loss": 3.246, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 3.99, |
|
"learning_rate": 8.669328010645377e-05, |
|
"loss": 3.1532, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 4.24, |
|
"learning_rate": 8.586161011310713e-05, |
|
"loss": 3.0313, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 4.49, |
|
"learning_rate": 8.502994011976048e-05, |
|
"loss": 2.9563, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 4.74, |
|
"learning_rate": 8.419827012641384e-05, |
|
"loss": 2.8741, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 4.99, |
|
"learning_rate": 8.336660013306721e-05, |
|
"loss": 2.816, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 5.24, |
|
"learning_rate": 8.253493013972057e-05, |
|
"loss": 2.7435, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 5.49, |
|
"learning_rate": 8.170326014637393e-05, |
|
"loss": 2.6801, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 5.74, |
|
"learning_rate": 8.087159015302728e-05, |
|
"loss": 2.6325, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 5.99, |
|
"learning_rate": 8.003992015968065e-05, |
|
"loss": 2.5931, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 6.24, |
|
"learning_rate": 7.9208250166334e-05, |
|
"loss": 2.525, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 6.49, |
|
"learning_rate": 7.837658017298735e-05, |
|
"loss": 2.4914, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 6.74, |
|
"learning_rate": 7.754491017964072e-05, |
|
"loss": 2.4559, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 6.99, |
|
"learning_rate": 7.671324018629408e-05, |
|
"loss": 2.4176, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 7.24, |
|
"learning_rate": 7.588157019294744e-05, |
|
"loss": 2.3771, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 7.49, |
|
"learning_rate": 7.50499001996008e-05, |
|
"loss": 2.3386, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 7.73, |
|
"learning_rate": 7.421823020625415e-05, |
|
"loss": 2.3137, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 7.98, |
|
"learning_rate": 7.338656021290752e-05, |
|
"loss": 2.2792, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 8.23, |
|
"learning_rate": 7.255489021956088e-05, |
|
"loss": 2.235, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 8.48, |
|
"learning_rate": 7.172322022621424e-05, |
|
"loss": 2.2126, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 8.73, |
|
"learning_rate": 7.089155023286759e-05, |
|
"loss": 2.1985, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 8.98, |
|
"learning_rate": 7.005988023952096e-05, |
|
"loss": 2.1803, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 9.23, |
|
"learning_rate": 6.922821024617432e-05, |
|
"loss": 2.1363, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 9.48, |
|
"learning_rate": 6.839654025282768e-05, |
|
"loss": 2.1104, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 9.73, |
|
"learning_rate": 6.756487025948103e-05, |
|
"loss": 2.0954, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 9.98, |
|
"learning_rate": 6.67332002661344e-05, |
|
"loss": 2.0818, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 10.23, |
|
"learning_rate": 6.590153027278776e-05, |
|
"loss": 2.0576, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 10.48, |
|
"learning_rate": 6.506986027944112e-05, |
|
"loss": 2.0304, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 10.73, |
|
"learning_rate": 6.423819028609448e-05, |
|
"loss": 2.0079, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 10.98, |
|
"learning_rate": 6.340652029274785e-05, |
|
"loss": 2.0089, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 11.23, |
|
"learning_rate": 6.25748502994012e-05, |
|
"loss": 1.971, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 11.48, |
|
"learning_rate": 6.174318030605456e-05, |
|
"loss": 1.9458, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 11.73, |
|
"learning_rate": 6.091151031270792e-05, |
|
"loss": 1.9479, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 11.98, |
|
"learning_rate": 6.007984031936128e-05, |
|
"loss": 1.9332, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 12.23, |
|
"learning_rate": 5.924817032601464e-05, |
|
"loss": 1.9048, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 12.48, |
|
"learning_rate": 5.8416500332668e-05, |
|
"loss": 1.8943, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 12.72, |
|
"learning_rate": 5.758483033932136e-05, |
|
"loss": 1.8837, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 12.97, |
|
"learning_rate": 5.675316034597472e-05, |
|
"loss": 1.8606, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 13.22, |
|
"learning_rate": 5.592149035262808e-05, |
|
"loss": 1.8289, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 13.47, |
|
"learning_rate": 5.508982035928144e-05, |
|
"loss": 1.8209, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 13.72, |
|
"learning_rate": 5.42581503659348e-05, |
|
"loss": 1.8284, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 13.97, |
|
"learning_rate": 5.342648037258816e-05, |
|
"loss": 1.814, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 14.22, |
|
"learning_rate": 5.259481037924152e-05, |
|
"loss": 1.7909, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 14.47, |
|
"learning_rate": 5.1763140385894884e-05, |
|
"loss": 1.7712, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 14.72, |
|
"learning_rate": 5.093147039254824e-05, |
|
"loss": 1.7695, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 14.97, |
|
"learning_rate": 5.0099800399201604e-05, |
|
"loss": 1.7594, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 15.22, |
|
"learning_rate": 4.9268130405854955e-05, |
|
"loss": 1.7413, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 15.47, |
|
"learning_rate": 4.843646041250832e-05, |
|
"loss": 1.7254, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 15.72, |
|
"learning_rate": 4.7604790419161675e-05, |
|
"loss": 1.7287, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 15.97, |
|
"learning_rate": 4.677312042581504e-05, |
|
"loss": 1.7078, |
|
"step": 32000 |
|
} |
|
], |
|
"max_steps": 60120, |
|
"num_train_epochs": 30, |
|
"total_flos": 3.3697979301888e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|