DrNicefellow's picture
Upload 10 files
96adb53 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.008150126138483479,
"eval_steps": 100000000,
"global_step": 1000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 8.15012613848348e-06,
"grad_norm": 43.29102325439453,
"learning_rate": 1.0000000000000001e-07,
"loss": 81.0533,
"step": 1
},
{
"epoch": 0.0002445037841545044,
"grad_norm": 42.03493881225586,
"learning_rate": 3e-06,
"loss": 70.3325,
"step": 30
},
{
"epoch": 0.0004890075683090088,
"grad_norm": 10.497812271118164,
"learning_rate": 6e-06,
"loss": 23.8844,
"step": 60
},
{
"epoch": 0.0007335113524635132,
"grad_norm": 6.692718029022217,
"learning_rate": 9e-06,
"loss": 11.7198,
"step": 90
},
{
"epoch": 0.0009780151366180175,
"grad_norm": 9.11446475982666,
"learning_rate": 1.2e-05,
"loss": 10.3199,
"step": 120
},
{
"epoch": 0.001222518920772522,
"grad_norm": 16.92243766784668,
"learning_rate": 1.5e-05,
"loss": 9.5338,
"step": 150
},
{
"epoch": 0.0014670227049270264,
"grad_norm": 12.929216384887695,
"learning_rate": 1.8e-05,
"loss": 8.9114,
"step": 180
},
{
"epoch": 0.0017115264890815308,
"grad_norm": 8.830116271972656,
"learning_rate": 2.1e-05,
"loss": 8.3867,
"step": 210
},
{
"epoch": 0.001956030273236035,
"grad_norm": 7.348124980926514,
"learning_rate": 2.4e-05,
"loss": 8.0113,
"step": 240
},
{
"epoch": 0.0022005340573905395,
"grad_norm": 12.751787185668945,
"learning_rate": 2.7000000000000002e-05,
"loss": 7.6093,
"step": 270
},
{
"epoch": 0.002445037841545044,
"grad_norm": 15.10444164276123,
"learning_rate": 3e-05,
"loss": 7.3484,
"step": 300
},
{
"epoch": 0.0026895416256995483,
"grad_norm": 6.653383731842041,
"learning_rate": 3.3e-05,
"loss": 7.2105,
"step": 330
},
{
"epoch": 0.0029340454098540527,
"grad_norm": 6.986039161682129,
"learning_rate": 3.6e-05,
"loss": 7.0228,
"step": 360
},
{
"epoch": 0.003178549194008557,
"grad_norm": 6.230088710784912,
"learning_rate": 3.9000000000000006e-05,
"loss": 6.8948,
"step": 390
},
{
"epoch": 0.0034230529781630616,
"grad_norm": 8.170981407165527,
"learning_rate": 4.2e-05,
"loss": 6.6965,
"step": 420
},
{
"epoch": 0.0036675567623175656,
"grad_norm": 5.6345930099487305,
"learning_rate": 4.5e-05,
"loss": 6.5988,
"step": 450
},
{
"epoch": 0.00391206054647207,
"grad_norm": 5.156513214111328,
"learning_rate": 4.8e-05,
"loss": 6.4795,
"step": 480
},
{
"epoch": 0.0041565643306265745,
"grad_norm": 5.4964189529418945,
"learning_rate": 4.999999990869806e-05,
"loss": 6.333,
"step": 510
},
{
"epoch": 0.004401068114781079,
"grad_norm": 3.67378306388855,
"learning_rate": 4.999999853916893e-05,
"loss": 6.2208,
"step": 540
},
{
"epoch": 0.004645571898935583,
"grad_norm": 8.507222175598145,
"learning_rate": 4.9999995526204936e-05,
"loss": 6.1097,
"step": 570
},
{
"epoch": 0.004890075683090088,
"grad_norm": 3.756618022918701,
"learning_rate": 4.999999086980628e-05,
"loss": 5.9886,
"step": 600
},
{
"epoch": 0.005134579467244592,
"grad_norm": 3.8149781227111816,
"learning_rate": 4.999998456997326e-05,
"loss": 5.8779,
"step": 630
},
{
"epoch": 0.005379083251399097,
"grad_norm": 3.840543270111084,
"learning_rate": 4.999997662670628e-05,
"loss": 5.805,
"step": 660
},
{
"epoch": 0.005623587035553601,
"grad_norm": 3.4208931922912598,
"learning_rate": 4.999996704000589e-05,
"loss": 5.6992,
"step": 690
},
{
"epoch": 0.0058680908197081055,
"grad_norm": 3.2975683212280273,
"learning_rate": 4.99999558098727e-05,
"loss": 5.6531,
"step": 720
},
{
"epoch": 0.00611259460386261,
"grad_norm": 4.05631160736084,
"learning_rate": 4.9999942936307445e-05,
"loss": 5.554,
"step": 750
},
{
"epoch": 0.006357098388017114,
"grad_norm": 3.1539864540100098,
"learning_rate": 4.9999928419310994e-05,
"loss": 5.4931,
"step": 780
},
{
"epoch": 0.006601602172171619,
"grad_norm": 4.811732292175293,
"learning_rate": 4.999991225888427e-05,
"loss": 5.4204,
"step": 810
},
{
"epoch": 0.006846105956326123,
"grad_norm": 2.9593210220336914,
"learning_rate": 4.999989445502837e-05,
"loss": 5.3687,
"step": 840
},
{
"epoch": 0.007090609740480627,
"grad_norm": 3.942239284515381,
"learning_rate": 4.9999875007744436e-05,
"loss": 5.3238,
"step": 870
},
{
"epoch": 0.007335113524635131,
"grad_norm": 2.29752254486084,
"learning_rate": 4.9999853917033756e-05,
"loss": 5.2423,
"step": 900
},
{
"epoch": 0.007579617308789636,
"grad_norm": 2.243770122528076,
"learning_rate": 4.999983118289773e-05,
"loss": 5.2384,
"step": 930
},
{
"epoch": 0.00782412109294414,
"grad_norm": 2.5572686195373535,
"learning_rate": 4.999980680533782e-05,
"loss": 5.1761,
"step": 960
},
{
"epoch": 0.008068624877098645,
"grad_norm": 2.4739913940429688,
"learning_rate": 4.999978078435567e-05,
"loss": 5.1215,
"step": 990
}
],
"logging_steps": 30,
"max_steps": 368091,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 100,
"total_flos": 9.730730754048e+17,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}