|
{ |
|
"best_metric": 0.09009132534265518, |
|
"best_model_checkpoint": "/kaggle/working/xls-r-amharic/checkpoint-2500", |
|
"epoch": 14.969696969696969, |
|
"eval_steps": 500, |
|
"global_step": 3705, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.40404040404040403, |
|
"grad_norm": 3.749224901199341, |
|
"learning_rate": 9.730094466936572e-06, |
|
"loss": 1.1928, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.8080808080808081, |
|
"grad_norm": 5.272401809692383, |
|
"learning_rate": 9.46288798920378e-06, |
|
"loss": 0.7006, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.2121212121212122, |
|
"grad_norm": 2.211305618286133, |
|
"learning_rate": 9.192982456140351e-06, |
|
"loss": 0.3993, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.6161616161616161, |
|
"grad_norm": 0.4073326289653778, |
|
"learning_rate": 8.923076923076925e-06, |
|
"loss": 0.2935, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 2.0202020202020203, |
|
"grad_norm": 10.367269515991211, |
|
"learning_rate": 8.65587044534413e-06, |
|
"loss": 0.2847, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.0202020202020203, |
|
"eval_accuracy": 0.9212121367454529, |
|
"eval_loss": 0.247890442609787, |
|
"eval_runtime": 43.8986, |
|
"eval_samples_per_second": 11.276, |
|
"eval_steps_per_second": 2.825, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.4242424242424243, |
|
"grad_norm": 14.821730613708496, |
|
"learning_rate": 8.385964912280704e-06, |
|
"loss": 0.1788, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 2.8282828282828283, |
|
"grad_norm": 1.658502221107483, |
|
"learning_rate": 8.116059379217275e-06, |
|
"loss": 0.1541, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 3.2323232323232323, |
|
"grad_norm": 7.924429416656494, |
|
"learning_rate": 7.846153846153847e-06, |
|
"loss": 0.1683, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 3.6363636363636362, |
|
"grad_norm": 0.14956633746623993, |
|
"learning_rate": 7.576248313090419e-06, |
|
"loss": 0.1315, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 4.040404040404041, |
|
"grad_norm": 0.08813250064849854, |
|
"learning_rate": 7.306342780026991e-06, |
|
"loss": 0.1138, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 4.040404040404041, |
|
"eval_accuracy": 0.9434343576431274, |
|
"eval_loss": 0.20633606612682343, |
|
"eval_runtime": 43.6554, |
|
"eval_samples_per_second": 11.339, |
|
"eval_steps_per_second": 2.84, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 4.444444444444445, |
|
"grad_norm": 2.7388453483581543, |
|
"learning_rate": 7.036437246963563e-06, |
|
"loss": 0.1113, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 4.848484848484849, |
|
"grad_norm": 1.112307071685791, |
|
"learning_rate": 6.766531713900135e-06, |
|
"loss": 0.1174, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 5.252525252525253, |
|
"grad_norm": 0.9322103261947632, |
|
"learning_rate": 6.496626180836708e-06, |
|
"loss": 0.1025, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 5.656565656565657, |
|
"grad_norm": 0.03639671951532364, |
|
"learning_rate": 6.22672064777328e-06, |
|
"loss": 0.0754, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 6.0606060606060606, |
|
"grad_norm": 0.024523159489035606, |
|
"learning_rate": 5.956815114709852e-06, |
|
"loss": 0.0614, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 6.0606060606060606, |
|
"eval_accuracy": 0.965656578540802, |
|
"eval_loss": 0.1415119469165802, |
|
"eval_runtime": 43.7636, |
|
"eval_samples_per_second": 11.311, |
|
"eval_steps_per_second": 2.833, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 6.4646464646464645, |
|
"grad_norm": 0.04833903908729553, |
|
"learning_rate": 5.686909581646424e-06, |
|
"loss": 0.0296, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 6.8686868686868685, |
|
"grad_norm": 0.018174033612012863, |
|
"learning_rate": 5.417004048582997e-06, |
|
"loss": 0.086, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 7.2727272727272725, |
|
"grad_norm": 0.2865201532840729, |
|
"learning_rate": 5.147098515519568e-06, |
|
"loss": 0.0671, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 7.6767676767676765, |
|
"grad_norm": 0.0348142571747303, |
|
"learning_rate": 4.877192982456141e-06, |
|
"loss": 0.0512, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 8.080808080808081, |
|
"grad_norm": 0.2693362832069397, |
|
"learning_rate": 4.607287449392713e-06, |
|
"loss": 0.0349, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 8.080808080808081, |
|
"eval_accuracy": 0.973737359046936, |
|
"eval_loss": 0.13826610147953033, |
|
"eval_runtime": 43.9764, |
|
"eval_samples_per_second": 11.256, |
|
"eval_steps_per_second": 2.82, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 8.484848484848484, |
|
"grad_norm": 0.033031389117240906, |
|
"learning_rate": 4.337381916329285e-06, |
|
"loss": 0.0251, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 8.88888888888889, |
|
"grad_norm": 0.13385087251663208, |
|
"learning_rate": 4.067476383265857e-06, |
|
"loss": 0.0367, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 9.292929292929292, |
|
"grad_norm": 41.249717712402344, |
|
"learning_rate": 3.7975708502024296e-06, |
|
"loss": 0.0501, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 9.696969696969697, |
|
"grad_norm": 0.02456289902329445, |
|
"learning_rate": 3.527665317139002e-06, |
|
"loss": 0.0555, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 10.1010101010101, |
|
"grad_norm": 0.010708549991250038, |
|
"learning_rate": 3.2577597840755737e-06, |
|
"loss": 0.0143, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 10.1010101010101, |
|
"eval_accuracy": 0.9818181991577148, |
|
"eval_loss": 0.09009132534265518, |
|
"eval_runtime": 44.1262, |
|
"eval_samples_per_second": 11.218, |
|
"eval_steps_per_second": 2.81, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 10.505050505050505, |
|
"grad_norm": 0.012150867842137814, |
|
"learning_rate": 2.9905533063427807e-06, |
|
"loss": 0.0486, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 10.909090909090908, |
|
"grad_norm": 0.008297057822346687, |
|
"learning_rate": 2.7206477732793525e-06, |
|
"loss": 0.0349, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 11.313131313131313, |
|
"grad_norm": 0.013805734924972057, |
|
"learning_rate": 2.4507422402159244e-06, |
|
"loss": 0.0214, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 11.717171717171716, |
|
"grad_norm": 0.011365755461156368, |
|
"learning_rate": 2.180836707152497e-06, |
|
"loss": 0.0229, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 12.121212121212121, |
|
"grad_norm": 0.02992076426744461, |
|
"learning_rate": 1.910931174089069e-06, |
|
"loss": 0.0178, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 12.121212121212121, |
|
"eval_accuracy": 0.9777777791023254, |
|
"eval_loss": 0.1187622994184494, |
|
"eval_runtime": 43.9086, |
|
"eval_samples_per_second": 11.273, |
|
"eval_steps_per_second": 2.824, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 12.525252525252526, |
|
"grad_norm": 0.09338176250457764, |
|
"learning_rate": 1.6410256410256412e-06, |
|
"loss": 0.0252, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 12.929292929292929, |
|
"grad_norm": 0.27980300784111023, |
|
"learning_rate": 1.3738191632928477e-06, |
|
"loss": 0.0387, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 13.333333333333334, |
|
"grad_norm": 0.007048506755381823, |
|
"learning_rate": 1.1039136302294197e-06, |
|
"loss": 0.0093, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 13.737373737373737, |
|
"grad_norm": 0.00685643358156085, |
|
"learning_rate": 8.34008097165992e-07, |
|
"loss": 0.0222, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 14.141414141414142, |
|
"grad_norm": 0.14381413161754608, |
|
"learning_rate": 5.641025641025642e-07, |
|
"loss": 0.0222, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 14.141414141414142, |
|
"eval_accuracy": 0.9777777791023254, |
|
"eval_loss": 0.12370182573795319, |
|
"eval_runtime": 44.0309, |
|
"eval_samples_per_second": 11.242, |
|
"eval_steps_per_second": 2.816, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 14.545454545454545, |
|
"grad_norm": 0.015089770779013634, |
|
"learning_rate": 2.941970310391363e-07, |
|
"loss": 0.0304, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 14.94949494949495, |
|
"grad_norm": 1.67782723903656, |
|
"learning_rate": 2.4291497975708507e-08, |
|
"loss": 0.0173, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 14.969696969696969, |
|
"step": 3705, |
|
"total_flos": 3.163398064220592e+18, |
|
"train_loss": 0.13106194541521884, |
|
"train_runtime": 5199.6798, |
|
"train_samples_per_second": 5.709, |
|
"train_steps_per_second": 0.713 |
|
} |
|
], |
|
"logging_steps": 100, |
|
"max_steps": 3705, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 15, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 3, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 3.163398064220592e+18, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|