|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.795063575168287, |
|
"eval_steps": 100, |
|
"global_step": 300, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.05983545250560957, |
|
"grad_norm": 7.40893212326487, |
|
"learning_rate": 1.96078431372549e-06, |
|
"loss": 0.6407, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.11967090501121914, |
|
"grad_norm": 2.443879636951024, |
|
"learning_rate": 3.92156862745098e-06, |
|
"loss": 0.3887, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.17950635751682872, |
|
"grad_norm": 2.182405695242156, |
|
"learning_rate": 5.882352941176471e-06, |
|
"loss": 0.2616, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.2393418100224383, |
|
"grad_norm": 2.01489573168189, |
|
"learning_rate": 7.84313725490196e-06, |
|
"loss": 0.2571, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.2991772625280479, |
|
"grad_norm": 1.979013183070111, |
|
"learning_rate": 9.803921568627451e-06, |
|
"loss": 0.2477, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.35901271503365745, |
|
"grad_norm": 1.549355728093093, |
|
"learning_rate": 9.990133642141359e-06, |
|
"loss": 0.2176, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.418848167539267, |
|
"grad_norm": 1.4426481204872057, |
|
"learning_rate": 9.95607770125771e-06, |
|
"loss": 0.2273, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.4786836200448766, |
|
"grad_norm": 1.7789832633152836, |
|
"learning_rate": 9.89787624799672e-06, |
|
"loss": 0.2125, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.5385190725504861, |
|
"grad_norm": 1.529318217095282, |
|
"learning_rate": 9.815812833988292e-06, |
|
"loss": 0.2229, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.5983545250560958, |
|
"grad_norm": 1.6259265112226373, |
|
"learning_rate": 9.710287263936485e-06, |
|
"loss": 0.2062, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.5983545250560958, |
|
"eval_loss": 0.2123425155878067, |
|
"eval_runtime": 33.4729, |
|
"eval_samples_per_second": 17.776, |
|
"eval_steps_per_second": 8.903, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.6581899775617053, |
|
"grad_norm": 1.6245243576341002, |
|
"learning_rate": 9.581813647811199e-06, |
|
"loss": 0.2105, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.7180254300673149, |
|
"grad_norm": 1.731561075586601, |
|
"learning_rate": 9.431017896156074e-06, |
|
"loss": 0.2048, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.7778608825729244, |
|
"grad_norm": 1.7874480467541498, |
|
"learning_rate": 9.25863467071524e-06, |
|
"loss": 0.2113, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.837696335078534, |
|
"grad_norm": 1.3708463663991368, |
|
"learning_rate": 9.065503805235139e-06, |
|
"loss": 0.1988, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.8975317875841436, |
|
"grad_norm": 1.3567660521800535, |
|
"learning_rate": 8.852566213878947e-06, |
|
"loss": 0.2038, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.9573672400897532, |
|
"grad_norm": 1.8281708498422444, |
|
"learning_rate": 8.620859307187339e-06, |
|
"loss": 0.2196, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 1.0172026925953628, |
|
"grad_norm": 1.2318054900550177, |
|
"learning_rate": 8.371511937918616e-06, |
|
"loss": 0.1762, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 1.0770381451009723, |
|
"grad_norm": 1.568321912319435, |
|
"learning_rate": 8.105738901391553e-06, |
|
"loss": 0.1288, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 1.136873597606582, |
|
"grad_norm": 1.3819346363939895, |
|
"learning_rate": 7.82483501712469e-06, |
|
"loss": 0.1214, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 1.1967090501121915, |
|
"grad_norm": 1.2680685647450163, |
|
"learning_rate": 7.530168820605819e-06, |
|
"loss": 0.1256, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.1967090501121915, |
|
"eval_loss": 0.20358169078826904, |
|
"eval_runtime": 32.7594, |
|
"eval_samples_per_second": 18.163, |
|
"eval_steps_per_second": 9.097, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.256544502617801, |
|
"grad_norm": 1.2942802177914767, |
|
"learning_rate": 7.223175895924638e-06, |
|
"loss": 0.1241, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 1.3163799551234106, |
|
"grad_norm": 1.4364370392498633, |
|
"learning_rate": 6.905351881751372e-06, |
|
"loss": 0.1254, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 1.37621540762902, |
|
"grad_norm": 1.330811194933078, |
|
"learning_rate": 6.578245184735513e-06, |
|
"loss": 0.1229, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 1.4360508601346298, |
|
"grad_norm": 1.304831888309303, |
|
"learning_rate": 6.243449435824276e-06, |
|
"loss": 0.1147, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 1.4958863126402393, |
|
"grad_norm": 1.2398683599838292, |
|
"learning_rate": 5.902595726252801e-06, |
|
"loss": 0.1345, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 1.555721765145849, |
|
"grad_norm": 1.3240317320353998, |
|
"learning_rate": 5.557344661031628e-06, |
|
"loss": 0.1236, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 1.6155572176514585, |
|
"grad_norm": 1.518581095835922, |
|
"learning_rate": 5.209378268645998e-06, |
|
"loss": 0.1218, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 1.675392670157068, |
|
"grad_norm": 1.5653129689570715, |
|
"learning_rate": 4.860391806382157e-06, |
|
"loss": 0.1246, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 1.7352281226626776, |
|
"grad_norm": 1.4836280079781416, |
|
"learning_rate": 4.512085501204254e-06, |
|
"loss": 0.1156, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 1.795063575168287, |
|
"grad_norm": 1.4998045733125407, |
|
"learning_rate": 4.166156266419489e-06, |
|
"loss": 0.1296, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.795063575168287, |
|
"eval_loss": 0.19370371103286743, |
|
"eval_runtime": 33.117, |
|
"eval_samples_per_second": 17.967, |
|
"eval_steps_per_second": 8.998, |
|
"step": 300 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 501, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 100, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 5914416119808.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|