|
{ |
|
"best_metric": 1.1649552583694458, |
|
"best_model_checkpoint": "./interact_output_20231214_183743/checkpoint-912", |
|
"epoch": 3.991247264770241, |
|
"eval_steps": 500, |
|
"global_step": 912, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0007984276816171436, |
|
"loss": 1.8072, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.0007981855684763583, |
|
"loss": 1.9931, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.0007974595227250475, |
|
"loss": 2.0219, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.0007962504250201388, |
|
"loss": 1.9413, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.000794559741936249, |
|
"loss": 1.8684, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.0007923895241868038, |
|
"loss": 1.7867, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.0007897424041366252, |
|
"loss": 1.8186, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.0007866215926090057, |
|
"loss": 1.7737, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.0007830308749911415, |
|
"loss": 1.6727, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.0007789746066426482, |
|
"loss": 1.6249, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 0.0007744577076127291, |
|
"loss": 1.7025, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 0.0007694856566724036, |
|
"loss": 1.6132, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 0.0007640644846690332, |
|
"loss": 1.63, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 0.0007582007672112082, |
|
"loss": 1.6888, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 0.0007519016166928652, |
|
"loss": 1.6102, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 0.0007451746736663118, |
|
"loss": 1.5319, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 0.000738028097574621, |
|
"loss": 1.5352, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 0.000730470556854638, |
|
"loss": 1.4991, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 0.0007225112184226035, |
|
"loss": 1.495, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 0.0007141597365551446, |
|
"loss": 1.4296, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 0.0007054262411791251, |
|
"loss": 1.4373, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 0.0006963213255845531, |
|
"loss": 1.4589, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_loss": 1.5730081796646118, |
|
"eval_runtime": 181.0133, |
|
"eval_samples_per_second": 14.883, |
|
"eval_steps_per_second": 1.862, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 0.0006868560335754548, |
|
"loss": 1.4361, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 0.000677041846074296, |
|
"loss": 1.3813, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 0.000666890667196201, |
|
"loss": 1.3511, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 0.0006564148098098617, |
|
"loss": 1.455, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 0.0006456269806026464, |
|
"loss": 1.4276, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 0.00063454026466803, |
|
"loss": 1.2906, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 0.0006231681096340324, |
|
"loss": 1.3605, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 0.0006115243093519255, |
|
"loss": 1.3765, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 0.0005996229871649842, |
|
"loss": 1.3846, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 0.0005874785787775835, |
|
"loss": 1.3476, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 0.0005751058147454162, |
|
"loss": 1.3307, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 0.0005625197026080706, |
|
"loss": 1.3481, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 0.00054973550868564, |
|
"loss": 1.2677, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 0.0005367687395614475, |
|
"loss": 1.2801, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 0.0005236351232733387, |
|
"loss": 1.2434, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 0.0005103505902363665, |
|
"loss": 1.2472, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 0.0004969312539199984, |
|
"loss": 1.1805, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 0.0004833933913032899, |
|
"loss": 1.2795, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 0.0004697534231317295, |
|
"loss": 1.1841, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 0.00045602789399970073, |
|
"loss": 1.2189, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 0.0004422334522827224, |
|
"loss": 1.2124, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 0.00042838682994380845, |
|
"loss": 1.1371, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 0.00041450482223843874, |
|
"loss": 1.1254, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_loss": 1.318668007850647, |
|
"eval_runtime": 181.264, |
|
"eval_samples_per_second": 14.862, |
|
"eval_steps_per_second": 1.859, |
|
"step": 457 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 0.0004006042673427602, |
|
"loss": 1.2324, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 0.0003867020259297277, |
|
"loss": 1.2353, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 0.00037281496071795675, |
|
"loss": 1.2029, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 0.0003589599160180951, |
|
"loss": 1.1946, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 0.0003451536973015218, |
|
"loss": 1.2571, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"learning_rate": 0.0003314130508161583, |
|
"loss": 1.1964, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 0.0003177546432741117, |
|
"loss": 1.2171, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 0.00030419504163579317, |
|
"loss": 1.1815, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"learning_rate": 0.00029075069301502925, |
|
"loss": 1.1589, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"learning_rate": 0.000277437904729541, |
|
"loss": 1.1154, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"learning_rate": 0.0002642728245209895, |
|
"loss": 1.1195, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"learning_rate": 0.0002512714209685778, |
|
"loss": 1.1485, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"learning_rate": 0.00023844946411996905, |
|
"loss": 1.1151, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"learning_rate": 0.0002258225063630134, |
|
"loss": 1.1342, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"learning_rate": 0.00021340586356148388, |
|
"loss": 1.1106, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"learning_rate": 0.0002012145964777057, |
|
"loss": 1.0693, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"learning_rate": 0.00018926349250461, |
|
"loss": 1.1118, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"learning_rate": 0.00017756704772937113, |
|
"loss": 1.097, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 0.00016613944935038317, |
|
"loss": 1.0072, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"learning_rate": 0.000154994558468902, |
|
"loss": 1.0244, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"learning_rate": 0.0001441458932762289, |
|
"loss": 1.0308, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"learning_rate": 0.00013360661265682426, |
|
"loss": 0.9882, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"learning_rate": 0.00012338950022724405, |
|
"loss": 0.9938, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_loss": 1.1857038736343384, |
|
"eval_runtime": 181.1024, |
|
"eval_samples_per_second": 14.876, |
|
"eval_steps_per_second": 1.861, |
|
"step": 685 |
|
}, |
|
{ |
|
"epoch": 3.02, |
|
"learning_rate": 0.00011350694883025702, |
|
"loss": 1.0906, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 3.06, |
|
"learning_rate": 0.00010397094550294988, |
|
"loss": 1.1792, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 3.11, |
|
"learning_rate": 9.4793056937056e-05, |
|
"loss": 1.0951, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 3.15, |
|
"learning_rate": 8.598441544914002e-05, |
|
"loss": 1.1168, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 3.19, |
|
"learning_rate": 7.755570547765905e-05, |
|
"loss": 1.0971, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 3.24, |
|
"learning_rate": 6.951715062327716e-05, |
|
"loss": 1.0359, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 3.28, |
|
"learning_rate": 6.187850124815228e-05, |
|
"loss": 1.077, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 3.33, |
|
"learning_rate": 5.4649022649238026e-05, |
|
"loss": 1.0996, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 3.37, |
|
"learning_rate": 4.783748381994562e-05, |
|
"loss": 1.0043, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 3.41, |
|
"learning_rate": 4.145214681379591e-05, |
|
"loss": 1.1422, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 3.46, |
|
"learning_rate": 3.550075672296503e-05, |
|
"loss": 1.1366, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 3.5, |
|
"learning_rate": 2.9990532283877747e-05, |
|
"loss": 1.0587, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 3.54, |
|
"learning_rate": 2.492815712124332e-05, |
|
"loss": 1.1301, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 3.59, |
|
"learning_rate": 2.0319771641155883e-05, |
|
"loss": 1.0567, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 3.63, |
|
"learning_rate": 1.617096558309071e-05, |
|
"loss": 1.1119, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 3.68, |
|
"learning_rate": 1.2486771239831942e-05, |
|
"loss": 1.1186, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 3.72, |
|
"learning_rate": 9.271657353555046e-06, |
|
"loss": 1.0765, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 3.76, |
|
"learning_rate": 6.529523695467422e-06, |
|
"loss": 1.0678, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 3.81, |
|
"learning_rate": 4.263696335582372e-06, |
|
"loss": 1.1022, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 3.85, |
|
"learning_rate": 2.476923608363819e-06, |
|
"loss": 1.0498, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 3.89, |
|
"learning_rate": 1.1713727791349433e-06, |
|
"loss": 1.0907, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 3.94, |
|
"learning_rate": 3.4862741529444126e-07, |
|
"loss": 1.0615, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 3.98, |
|
"learning_rate": 9.685465529235211e-09, |
|
"loss": 1.0461, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 3.99, |
|
"eval_loss": 1.1649552583694458, |
|
"eval_runtime": 181.1967, |
|
"eval_samples_per_second": 14.868, |
|
"eval_steps_per_second": 1.86, |
|
"step": 912 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 912, |
|
"num_train_epochs": 4, |
|
"save_steps": 500, |
|
"total_flos": 1.1665671520864666e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|