|
{ |
|
"best_metric": 0.47372516989707947, |
|
"best_model_checkpoint": "/data/jcanete/all_results/xnli/albeto_xlarge/epochs_2_bs_16_lr_5e-6/checkpoint-22000", |
|
"epoch": 2.0, |
|
"global_step": 49088, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.9493766297262065e-06, |
|
"loss": 1.0212, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 4.898549543676663e-06, |
|
"loss": 0.8644, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 4.847722457627119e-06, |
|
"loss": 0.765, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.7967935136897e-06, |
|
"loss": 0.7077, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"eval_accuracy": 0.7052208835341366, |
|
"eval_loss": 0.6958776712417603, |
|
"eval_runtime": 15.7507, |
|
"eval_samples_per_second": 158.088, |
|
"eval_steps_per_second": 9.904, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.7458645697522824e-06, |
|
"loss": 0.6694, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.694935625814864e-06, |
|
"loss": 0.6511, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.644006681877445e-06, |
|
"loss": 0.6403, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.593179595827901e-06, |
|
"loss": 0.6062, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"eval_accuracy": 0.7610441767068273, |
|
"eval_loss": 0.6156352162361145, |
|
"eval_runtime": 15.8305, |
|
"eval_samples_per_second": 157.291, |
|
"eval_steps_per_second": 9.854, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.542250651890483e-06, |
|
"loss": 0.6191, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.491321707953064e-06, |
|
"loss": 0.6144, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.440392764015646e-06, |
|
"loss": 0.6089, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.389463820078227e-06, |
|
"loss": 0.584, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"eval_accuracy": 0.7698795180722892, |
|
"eval_loss": 0.6093908548355103, |
|
"eval_runtime": 15.9514, |
|
"eval_samples_per_second": 156.099, |
|
"eval_steps_per_second": 9.78, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.338636734028683e-06, |
|
"loss": 0.583, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.287707790091265e-06, |
|
"loss": 0.5849, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.236778846153847e-06, |
|
"loss": 0.5819, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.185849902216428e-06, |
|
"loss": 0.5567, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"eval_accuracy": 0.7718875502008032, |
|
"eval_loss": 0.5614191889762878, |
|
"eval_runtime": 15.8137, |
|
"eval_samples_per_second": 157.458, |
|
"eval_steps_per_second": 9.865, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.134920958279009e-06, |
|
"loss": 0.5554, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 4.083992014341591e-06, |
|
"loss": 0.5476, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 4.033063070404172e-06, |
|
"loss": 0.5531, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 3.982134126466754e-06, |
|
"loss": 0.5464, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"eval_accuracy": 0.7847389558232932, |
|
"eval_loss": 0.5493358373641968, |
|
"eval_runtime": 15.7384, |
|
"eval_samples_per_second": 158.211, |
|
"eval_steps_per_second": 9.912, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 3.9312051825293356e-06, |
|
"loss": 0.5482, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 3.880378096479792e-06, |
|
"loss": 0.5451, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 3.8294491525423735e-06, |
|
"loss": 0.5402, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 3.7786220664928295e-06, |
|
"loss": 0.5393, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"eval_accuracy": 0.7911646586345381, |
|
"eval_loss": 0.5225389003753662, |
|
"eval_runtime": 15.9954, |
|
"eval_samples_per_second": 155.67, |
|
"eval_steps_per_second": 9.753, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 3.7276931225554107e-06, |
|
"loss": 0.5344, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 3.6767641786179927e-06, |
|
"loss": 0.5495, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 3.6258352346805742e-06, |
|
"loss": 0.5362, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 3.5750081486310302e-06, |
|
"loss": 0.5308, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"eval_accuracy": 0.7919678714859437, |
|
"eval_loss": 0.527259111404419, |
|
"eval_runtime": 16.0033, |
|
"eval_samples_per_second": 155.593, |
|
"eval_steps_per_second": 9.748, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 3.5240792046936118e-06, |
|
"loss": 0.5218, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 3.4731502607561934e-06, |
|
"loss": 0.5421, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 3.4222213168187745e-06, |
|
"loss": 0.5173, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 3.371292372881356e-06, |
|
"loss": 0.5246, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"eval_accuracy": 0.7975903614457831, |
|
"eval_loss": 0.506671130657196, |
|
"eval_runtime": 15.9337, |
|
"eval_samples_per_second": 156.272, |
|
"eval_steps_per_second": 9.791, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 3.3204652868318125e-06, |
|
"loss": 0.5237, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 3.2695363428943936e-06, |
|
"loss": 0.5186, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 3.2186073989569756e-06, |
|
"loss": 0.5267, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 3.167678455019557e-06, |
|
"loss": 0.5075, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"eval_accuracy": 0.7967871485943775, |
|
"eval_loss": 0.5081815719604492, |
|
"eval_runtime": 16.0036, |
|
"eval_samples_per_second": 155.59, |
|
"eval_steps_per_second": 9.748, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 3.1167495110821384e-06, |
|
"loss": 0.5029, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 3.06582056714472e-06, |
|
"loss": 0.5123, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 3.0148916232073015e-06, |
|
"loss": 0.5031, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 2.9639626792698826e-06, |
|
"loss": 0.5008, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"eval_accuracy": 0.804417670682731, |
|
"eval_loss": 0.49554213881492615, |
|
"eval_runtime": 16.1075, |
|
"eval_samples_per_second": 154.587, |
|
"eval_steps_per_second": 9.685, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 2.9130337353324646e-06, |
|
"loss": 0.5054, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 2.862104791395046e-06, |
|
"loss": 0.4996, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 2.8111758474576274e-06, |
|
"loss": 0.4849, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 2.760246903520209e-06, |
|
"loss": 0.507, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"eval_accuracy": 0.8152610441767069, |
|
"eval_loss": 0.47372516989707947, |
|
"eval_runtime": 16.3654, |
|
"eval_samples_per_second": 152.15, |
|
"eval_steps_per_second": 9.532, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 2.70931795958279e-06, |
|
"loss": 0.4995, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 2.6584908735332465e-06, |
|
"loss": 0.5007, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 2.607663787483703e-06, |
|
"loss": 0.4944, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 2.5567348435462845e-06, |
|
"loss": 0.5053, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"eval_accuracy": 0.8072289156626506, |
|
"eval_loss": 0.49480196833610535, |
|
"eval_runtime": 16.2328, |
|
"eval_samples_per_second": 153.393, |
|
"eval_steps_per_second": 9.61, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 2.5058058996088656e-06, |
|
"loss": 0.4946, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 2.4548769556714476e-06, |
|
"loss": 0.4133, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 2.4039480117340288e-06, |
|
"loss": 0.4103, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 2.3530190677966103e-06, |
|
"loss": 0.4063, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"eval_accuracy": 0.8040160642570281, |
|
"eval_loss": 0.4989866316318512, |
|
"eval_runtime": 15.9626, |
|
"eval_samples_per_second": 155.99, |
|
"eval_steps_per_second": 9.773, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 2.3021919817470667e-06, |
|
"loss": 0.4123, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 2.2512630378096483e-06, |
|
"loss": 0.3875, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 2.2003340938722295e-06, |
|
"loss": 0.4103, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 2.149405149934811e-06, |
|
"loss": 0.4061, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"eval_accuracy": 0.797991967871486, |
|
"eval_loss": 0.5204265117645264, |
|
"eval_runtime": 16.59, |
|
"eval_samples_per_second": 150.09, |
|
"eval_steps_per_second": 9.403, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 2.0985780638852674e-06, |
|
"loss": 0.4178, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 2.047750977835724e-06, |
|
"loss": 0.4062, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 1.9968220338983054e-06, |
|
"loss": 0.4142, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 1.9458930899608866e-06, |
|
"loss": 0.4118, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"eval_accuracy": 0.8120481927710843, |
|
"eval_loss": 0.5043097138404846, |
|
"eval_runtime": 15.9992, |
|
"eval_samples_per_second": 155.632, |
|
"eval_steps_per_second": 9.75, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 1.8949641460234683e-06, |
|
"loss": 0.4032, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 1.8440352020860497e-06, |
|
"loss": 0.4004, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 1.793106258148631e-06, |
|
"loss": 0.3999, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 1.7421773142112129e-06, |
|
"loss": 0.4057, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"eval_accuracy": 0.8176706827309237, |
|
"eval_loss": 0.4870663285255432, |
|
"eval_runtime": 15.9746, |
|
"eval_samples_per_second": 155.872, |
|
"eval_steps_per_second": 9.765, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 1.691350228161669e-06, |
|
"loss": 0.4008, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 1.6404212842242504e-06, |
|
"loss": 0.4112, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 1.589492340286832e-06, |
|
"loss": 0.4101, |
|
"step": 33500 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 1.5385633963494133e-06, |
|
"loss": 0.4108, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"eval_accuracy": 0.8052208835341366, |
|
"eval_loss": 0.5033333897590637, |
|
"eval_runtime": 16.0841, |
|
"eval_samples_per_second": 154.811, |
|
"eval_steps_per_second": 9.699, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 1.487634452411995e-06, |
|
"loss": 0.3994, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 1.4367055084745765e-06, |
|
"loss": 0.4046, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 1.3857765645371578e-06, |
|
"loss": 0.3958, |
|
"step": 35500 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 1.334949478487614e-06, |
|
"loss": 0.3927, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"eval_accuracy": 0.8068273092369478, |
|
"eval_loss": 0.4963078498840332, |
|
"eval_runtime": 15.7763, |
|
"eval_samples_per_second": 157.832, |
|
"eval_steps_per_second": 9.888, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 1.2840205345501958e-06, |
|
"loss": 0.3971, |
|
"step": 36500 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 1.2330915906127772e-06, |
|
"loss": 0.3985, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 1.1821626466753585e-06, |
|
"loss": 0.3922, |
|
"step": 37500 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 1.1312337027379401e-06, |
|
"loss": 0.4077, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"eval_accuracy": 0.814859437751004, |
|
"eval_loss": 0.47995567321777344, |
|
"eval_runtime": 15.901, |
|
"eval_samples_per_second": 156.594, |
|
"eval_steps_per_second": 9.811, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 1.0803047588005217e-06, |
|
"loss": 0.4048, |
|
"step": 38500 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 1.029375814863103e-06, |
|
"loss": 0.3926, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 9.784468709256846e-07, |
|
"loss": 0.4051, |
|
"step": 39500 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 9.27517926988266e-07, |
|
"loss": 0.4029, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"eval_accuracy": 0.810843373493976, |
|
"eval_loss": 0.4851303994655609, |
|
"eval_runtime": 15.8122, |
|
"eval_samples_per_second": 157.474, |
|
"eval_steps_per_second": 9.866, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 8.765889830508476e-07, |
|
"loss": 0.385, |
|
"step": 40500 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 8.256600391134289e-07, |
|
"loss": 0.4012, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 7.747310951760105e-07, |
|
"loss": 0.3998, |
|
"step": 41500 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 7.239040091264668e-07, |
|
"loss": 0.3926, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"eval_accuracy": 0.8068273092369478, |
|
"eval_loss": 0.49413371086120605, |
|
"eval_runtime": 16.1684, |
|
"eval_samples_per_second": 154.004, |
|
"eval_steps_per_second": 9.648, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 6.729750651890484e-07, |
|
"loss": 0.396, |
|
"step": 42500 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 6.220461212516297e-07, |
|
"loss": 0.3896, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 5.711171773142113e-07, |
|
"loss": 0.3993, |
|
"step": 43500 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 5.201882333767928e-07, |
|
"loss": 0.3895, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"eval_accuracy": 0.8112449799196787, |
|
"eval_loss": 0.47773313522338867, |
|
"eval_runtime": 16.1399, |
|
"eval_samples_per_second": 154.276, |
|
"eval_steps_per_second": 9.665, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 4.694630052151239e-07, |
|
"loss": 0.3971, |
|
"step": 44500 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 4.1853406127770535e-07, |
|
"loss": 0.3864, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 3.6770697522816176e-07, |
|
"loss": 0.3901, |
|
"step": 45500 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 3.167780312907432e-07, |
|
"loss": 0.3806, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"eval_accuracy": 0.8124497991967872, |
|
"eval_loss": 0.4864996075630188, |
|
"eval_runtime": 15.9489, |
|
"eval_samples_per_second": 156.123, |
|
"eval_steps_per_second": 9.781, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 2.6584908735332464e-07, |
|
"loss": 0.386, |
|
"step": 46500 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 2.1492014341590616e-07, |
|
"loss": 0.3804, |
|
"step": 47000 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 1.6409305736636246e-07, |
|
"loss": 0.3886, |
|
"step": 47500 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 1.1316411342894395e-07, |
|
"loss": 0.3771, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"eval_accuracy": 0.8156626506024096, |
|
"eval_loss": 0.4936372935771942, |
|
"eval_runtime": 16.2104, |
|
"eval_samples_per_second": 153.605, |
|
"eval_steps_per_second": 9.623, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 6.223516949152543e-08, |
|
"loss": 0.4011, |
|
"step": 48500 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 1.130622555410691e-08, |
|
"loss": 0.394, |
|
"step": 49000 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"step": 49088, |
|
"total_flos": 2.2556354760831744e+16, |
|
"train_loss": 0.4839201732751133, |
|
"train_runtime": 15881.4239, |
|
"train_samples_per_second": 49.454, |
|
"train_steps_per_second": 3.091 |
|
} |
|
], |
|
"max_steps": 49088, |
|
"num_train_epochs": 2, |
|
"total_flos": 2.2556354760831744e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|