|
{ |
|
"best_metric": 0.35073795914649963, |
|
"best_model_checkpoint": "../../saves/LLaMA3-70B-qlora-bnb/lora/sft/A61K-15950_2/checkpoint-200", |
|
"epoch": 0.9975308641975309, |
|
"eval_steps": 100, |
|
"global_step": 202, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.04938271604938271, |
|
"grad_norm": 46.31583786010742, |
|
"learning_rate": 9.523809523809523e-06, |
|
"loss": 12.5496, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.09876543209876543, |
|
"grad_norm": 23.47727394104004, |
|
"learning_rate": 5.714285714285714e-05, |
|
"loss": 11.9318, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.14814814814814814, |
|
"grad_norm": 25.78252410888672, |
|
"learning_rate": 9.999246866958692e-05, |
|
"loss": 7.8505, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.19753086419753085, |
|
"grad_norm": 13.642712593078613, |
|
"learning_rate": 9.909145108993794e-05, |
|
"loss": 1.2163, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.24691358024691357, |
|
"grad_norm": 10.011795997619629, |
|
"learning_rate": 9.67152097716334e-05, |
|
"loss": 0.4577, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.2962962962962963, |
|
"grad_norm": 10.739720344543457, |
|
"learning_rate": 9.29351520070574e-05, |
|
"loss": 0.3768, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.345679012345679, |
|
"grad_norm": 11.204980850219727, |
|
"learning_rate": 8.786487050581583e-05, |
|
"loss": 0.4593, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.3950617283950617, |
|
"grad_norm": 4.646059513092041, |
|
"learning_rate": 8.165672987449962e-05, |
|
"loss": 0.4234, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.4444444444444444, |
|
"grad_norm": 5.678186893463135, |
|
"learning_rate": 7.449728798069864e-05, |
|
"loss": 0.3857, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.49382716049382713, |
|
"grad_norm": 49.75868225097656, |
|
"learning_rate": 6.66016897916682e-05, |
|
"loss": 0.3663, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.49382716049382713, |
|
"eval_loss": 0.35489073395729065, |
|
"eval_runtime": 1317.5223, |
|
"eval_samples_per_second": 0.137, |
|
"eval_steps_per_second": 0.137, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.5432098765432098, |
|
"grad_norm": 2.499194860458374, |
|
"learning_rate": 5.820720215572375e-05, |
|
"loss": 0.37, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.5925925925925926, |
|
"grad_norm": 3.846980571746826, |
|
"learning_rate": 4.956608380955877e-05, |
|
"loss": 0.3788, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.6419753086419753, |
|
"grad_norm": 2.995356321334839, |
|
"learning_rate": 4.093800487148857e-05, |
|
"loss": 0.401, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.691358024691358, |
|
"grad_norm": 4.932299613952637, |
|
"learning_rate": 3.258224361880657e-05, |
|
"loss": 0.3944, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.7407407407407407, |
|
"grad_norm": 4.609350681304932, |
|
"learning_rate": 2.474989504016798e-05, |
|
"loss": 0.3521, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.7901234567901234, |
|
"grad_norm": 3.664435625076294, |
|
"learning_rate": 1.7676325300069825e-05, |
|
"loss": 0.3425, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.8395061728395061, |
|
"grad_norm": 31.691667556762695, |
|
"learning_rate": 1.1574098862709992e-05, |
|
"loss": 0.3548, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.8888888888888888, |
|
"grad_norm": 2.024195909500122, |
|
"learning_rate": 6.626590818846162e-06, |
|
"loss": 0.3404, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.9382716049382716, |
|
"grad_norm": 2.383239507675171, |
|
"learning_rate": 2.9824763685681766e-06, |
|
"loss": 0.3374, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.9876543209876543, |
|
"grad_norm": 2.007232189178467, |
|
"learning_rate": 7.512630539036502e-07, |
|
"loss": 0.3383, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.9876543209876543, |
|
"eval_loss": 0.35073795914649963, |
|
"eval_runtime": 1316.6761, |
|
"eval_samples_per_second": 0.137, |
|
"eval_steps_per_second": 0.137, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.9975308641975309, |
|
"step": 202, |
|
"total_flos": 8.350412130662744e+18, |
|
"train_loss": 1.9651573648547183, |
|
"train_runtime": 27220.4722, |
|
"train_samples_per_second": 0.06, |
|
"train_steps_per_second": 0.007 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 202, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 8.350412130662744e+18, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|