|
{ |
|
"best_metric": 0.28241145610809326, |
|
"best_model_checkpoint": "roberta-base-Address-classifier/checkpoint-1356", |
|
"epoch": 3.0, |
|
"eval_steps": 500, |
|
"global_step": 1356, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.048672566371681415, |
|
"grad_norm": 11.64233112335205, |
|
"learning_rate": 8.088235294117648e-06, |
|
"loss": 2.0159, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.09734513274336283, |
|
"grad_norm": 9.701587677001953, |
|
"learning_rate": 1.6176470588235296e-05, |
|
"loss": 1.8444, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.14601769911504425, |
|
"grad_norm": 17.245328903198242, |
|
"learning_rate": 2.4264705882352942e-05, |
|
"loss": 1.5743, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.19469026548672566, |
|
"grad_norm": 15.90720272064209, |
|
"learning_rate": 3.235294117647059e-05, |
|
"loss": 1.4247, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.24336283185840707, |
|
"grad_norm": 46.80242156982422, |
|
"learning_rate": 4.044117647058824e-05, |
|
"loss": 1.3278, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.2920353982300885, |
|
"grad_norm": 18.616405487060547, |
|
"learning_rate": 4.8529411764705885e-05, |
|
"loss": 1.297, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.3407079646017699, |
|
"grad_norm": 13.270706176757812, |
|
"learning_rate": 4.926229508196721e-05, |
|
"loss": 0.8346, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.3893805309734513, |
|
"grad_norm": 15.923650741577148, |
|
"learning_rate": 4.836065573770492e-05, |
|
"loss": 0.8817, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.43805309734513276, |
|
"grad_norm": 10.419814109802246, |
|
"learning_rate": 4.745901639344262e-05, |
|
"loss": 0.8942, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.48672566371681414, |
|
"grad_norm": 73.36026763916016, |
|
"learning_rate": 4.655737704918033e-05, |
|
"loss": 0.8241, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.5353982300884956, |
|
"grad_norm": 0.5245129466056824, |
|
"learning_rate": 4.5655737704918036e-05, |
|
"loss": 0.8222, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 0.584070796460177, |
|
"grad_norm": 2.809516429901123, |
|
"learning_rate": 4.475409836065574e-05, |
|
"loss": 0.5647, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 0.6327433628318584, |
|
"grad_norm": 0.8721010088920593, |
|
"learning_rate": 4.3852459016393444e-05, |
|
"loss": 1.0694, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 0.6814159292035398, |
|
"grad_norm": 23.75225067138672, |
|
"learning_rate": 4.295081967213115e-05, |
|
"loss": 0.9127, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 0.7300884955752213, |
|
"grad_norm": 0.05216699093580246, |
|
"learning_rate": 4.204918032786885e-05, |
|
"loss": 0.5421, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.7787610619469026, |
|
"grad_norm": 158.07220458984375, |
|
"learning_rate": 4.1147540983606556e-05, |
|
"loss": 0.6714, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 0.827433628318584, |
|
"grad_norm": 0.044040240347385406, |
|
"learning_rate": 4.0245901639344266e-05, |
|
"loss": 0.4678, |
|
"step": 374 |
|
}, |
|
{ |
|
"epoch": 0.8761061946902655, |
|
"grad_norm": 14.472332954406738, |
|
"learning_rate": 3.934426229508197e-05, |
|
"loss": 0.7397, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 0.9247787610619469, |
|
"grad_norm": 0.1380554586648941, |
|
"learning_rate": 3.8442622950819674e-05, |
|
"loss": 0.6216, |
|
"step": 418 |
|
}, |
|
{ |
|
"epoch": 0.9734513274336283, |
|
"grad_norm": 2.5347917079925537, |
|
"learning_rate": 3.754098360655738e-05, |
|
"loss": 0.618, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.8502202643171806, |
|
"eval_f1_macro": 0.6415460023258589, |
|
"eval_f1_micro": 0.8502202643171806, |
|
"eval_f1_weighted": 0.7985454278732038, |
|
"eval_loss": 0.39620670676231384, |
|
"eval_precision_macro": 0.6037581699346405, |
|
"eval_precision_micro": 0.8502202643171806, |
|
"eval_precision_weighted": 0.7685605079036019, |
|
"eval_recall_macro": 0.7080745341614907, |
|
"eval_recall_micro": 0.8502202643171806, |
|
"eval_recall_weighted": 0.8502202643171806, |
|
"eval_runtime": 39.9004, |
|
"eval_samples_per_second": 5.689, |
|
"eval_steps_per_second": 1.429, |
|
"step": 452 |
|
}, |
|
{ |
|
"epoch": 1.0221238938053097, |
|
"grad_norm": 60.38380432128906, |
|
"learning_rate": 3.663934426229508e-05, |
|
"loss": 0.6679, |
|
"step": 462 |
|
}, |
|
{ |
|
"epoch": 1.0707964601769913, |
|
"grad_norm": 4.803969383239746, |
|
"learning_rate": 3.5737704918032786e-05, |
|
"loss": 0.3501, |
|
"step": 484 |
|
}, |
|
{ |
|
"epoch": 1.1194690265486726, |
|
"grad_norm": 32.821571350097656, |
|
"learning_rate": 3.483606557377049e-05, |
|
"loss": 0.2716, |
|
"step": 506 |
|
}, |
|
{ |
|
"epoch": 1.168141592920354, |
|
"grad_norm": 17.706575393676758, |
|
"learning_rate": 3.39344262295082e-05, |
|
"loss": 0.507, |
|
"step": 528 |
|
}, |
|
{ |
|
"epoch": 1.2168141592920354, |
|
"grad_norm": 0.13019758462905884, |
|
"learning_rate": 3.3032786885245905e-05, |
|
"loss": 0.7063, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 1.2654867256637168, |
|
"grad_norm": 28.202177047729492, |
|
"learning_rate": 3.213114754098361e-05, |
|
"loss": 0.7669, |
|
"step": 572 |
|
}, |
|
{ |
|
"epoch": 1.3141592920353982, |
|
"grad_norm": 0.33747950196266174, |
|
"learning_rate": 3.122950819672131e-05, |
|
"loss": 0.3914, |
|
"step": 594 |
|
}, |
|
{ |
|
"epoch": 1.3628318584070795, |
|
"grad_norm": 0.025730812922120094, |
|
"learning_rate": 3.0327868852459017e-05, |
|
"loss": 0.2898, |
|
"step": 616 |
|
}, |
|
{ |
|
"epoch": 1.411504424778761, |
|
"grad_norm": 0.22744347155094147, |
|
"learning_rate": 2.9426229508196725e-05, |
|
"loss": 0.4201, |
|
"step": 638 |
|
}, |
|
{ |
|
"epoch": 1.4601769911504425, |
|
"grad_norm": 0.045706842094659805, |
|
"learning_rate": 2.852459016393443e-05, |
|
"loss": 0.0077, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 1.508849557522124, |
|
"grad_norm": 0.16193030774593353, |
|
"learning_rate": 2.7622950819672133e-05, |
|
"loss": 0.3191, |
|
"step": 682 |
|
}, |
|
{ |
|
"epoch": 1.5575221238938053, |
|
"grad_norm": 0.03732098266482353, |
|
"learning_rate": 2.6721311475409837e-05, |
|
"loss": 0.9339, |
|
"step": 704 |
|
}, |
|
{ |
|
"epoch": 1.606194690265487, |
|
"grad_norm": 1.1235909461975098, |
|
"learning_rate": 2.5819672131147544e-05, |
|
"loss": 0.3131, |
|
"step": 726 |
|
}, |
|
{ |
|
"epoch": 1.6548672566371683, |
|
"grad_norm": 0.043644171208143234, |
|
"learning_rate": 2.4918032786885248e-05, |
|
"loss": 0.2802, |
|
"step": 748 |
|
}, |
|
{ |
|
"epoch": 1.7035398230088497, |
|
"grad_norm": 0.18655426800251007, |
|
"learning_rate": 2.4016393442622952e-05, |
|
"loss": 0.518, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 1.752212389380531, |
|
"grad_norm": 128.0469207763672, |
|
"learning_rate": 2.311475409836066e-05, |
|
"loss": 0.3009, |
|
"step": 792 |
|
}, |
|
{ |
|
"epoch": 1.8008849557522124, |
|
"grad_norm": 0.08213580399751663, |
|
"learning_rate": 2.2213114754098363e-05, |
|
"loss": 0.4224, |
|
"step": 814 |
|
}, |
|
{ |
|
"epoch": 1.8495575221238938, |
|
"grad_norm": 0.04984794929623604, |
|
"learning_rate": 2.1311475409836064e-05, |
|
"loss": 0.1551, |
|
"step": 836 |
|
}, |
|
{ |
|
"epoch": 1.8982300884955752, |
|
"grad_norm": 0.031158190220594406, |
|
"learning_rate": 2.040983606557377e-05, |
|
"loss": 0.4602, |
|
"step": 858 |
|
}, |
|
{ |
|
"epoch": 1.9469026548672566, |
|
"grad_norm": 0.0321173295378685, |
|
"learning_rate": 1.9508196721311475e-05, |
|
"loss": 0.1192, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 1.995575221238938, |
|
"grad_norm": 0.03802354261279106, |
|
"learning_rate": 1.860655737704918e-05, |
|
"loss": 0.1485, |
|
"step": 902 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.947136563876652, |
|
"eval_f1_macro": 0.8020900163344027, |
|
"eval_f1_micro": 0.947136563876652, |
|
"eval_f1_weighted": 0.944319826580159, |
|
"eval_loss": 0.315266877412796, |
|
"eval_precision_macro": 0.8020293882362848, |
|
"eval_precision_micro": 0.947136563876652, |
|
"eval_precision_weighted": 0.9424897852666347, |
|
"eval_recall_macro": 0.8030357142857143, |
|
"eval_recall_micro": 0.947136563876652, |
|
"eval_recall_weighted": 0.947136563876652, |
|
"eval_runtime": 39.853, |
|
"eval_samples_per_second": 5.696, |
|
"eval_steps_per_second": 1.43, |
|
"step": 904 |
|
}, |
|
{ |
|
"epoch": 2.0442477876106193, |
|
"grad_norm": 0.1867416501045227, |
|
"learning_rate": 1.7704918032786887e-05, |
|
"loss": 0.0035, |
|
"step": 924 |
|
}, |
|
{ |
|
"epoch": 2.0929203539823007, |
|
"grad_norm": 0.14578068256378174, |
|
"learning_rate": 1.680327868852459e-05, |
|
"loss": 0.9124, |
|
"step": 946 |
|
}, |
|
{ |
|
"epoch": 2.1415929203539825, |
|
"grad_norm": 926.0078735351562, |
|
"learning_rate": 1.5901639344262295e-05, |
|
"loss": 0.289, |
|
"step": 968 |
|
}, |
|
{ |
|
"epoch": 2.190265486725664, |
|
"grad_norm": 0.2907329797744751, |
|
"learning_rate": 1.5e-05, |
|
"loss": 0.539, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 2.2389380530973453, |
|
"grad_norm": 0.43732479214668274, |
|
"learning_rate": 1.4098360655737704e-05, |
|
"loss": 0.176, |
|
"step": 1012 |
|
}, |
|
{ |
|
"epoch": 2.2876106194690267, |
|
"grad_norm": 0.008984439074993134, |
|
"learning_rate": 1.319672131147541e-05, |
|
"loss": 0.4674, |
|
"step": 1034 |
|
}, |
|
{ |
|
"epoch": 2.336283185840708, |
|
"grad_norm": 0.022706633433699608, |
|
"learning_rate": 1.2295081967213116e-05, |
|
"loss": 0.4095, |
|
"step": 1056 |
|
}, |
|
{ |
|
"epoch": 2.3849557522123894, |
|
"grad_norm": 0.14266431331634521, |
|
"learning_rate": 1.139344262295082e-05, |
|
"loss": 0.3441, |
|
"step": 1078 |
|
}, |
|
{ |
|
"epoch": 2.433628318584071, |
|
"grad_norm": 25.22464370727539, |
|
"learning_rate": 1.0491803278688525e-05, |
|
"loss": 0.3304, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 2.482300884955752, |
|
"grad_norm": 0.20000261068344116, |
|
"learning_rate": 9.59016393442623e-06, |
|
"loss": 0.232, |
|
"step": 1122 |
|
}, |
|
{ |
|
"epoch": 2.5309734513274336, |
|
"grad_norm": 0.049278829246759415, |
|
"learning_rate": 8.688524590163935e-06, |
|
"loss": 0.5132, |
|
"step": 1144 |
|
}, |
|
{ |
|
"epoch": 2.579646017699115, |
|
"grad_norm": 0.10397324711084366, |
|
"learning_rate": 7.78688524590164e-06, |
|
"loss": 0.115, |
|
"step": 1166 |
|
}, |
|
{ |
|
"epoch": 2.6283185840707963, |
|
"grad_norm": 0.019381407648324966, |
|
"learning_rate": 6.885245901639345e-06, |
|
"loss": 0.0654, |
|
"step": 1188 |
|
}, |
|
{ |
|
"epoch": 2.676991150442478, |
|
"grad_norm": 0.008687891066074371, |
|
"learning_rate": 5.98360655737705e-06, |
|
"loss": 0.2235, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 2.725663716814159, |
|
"grad_norm": 0.029992764815688133, |
|
"learning_rate": 5.0819672131147545e-06, |
|
"loss": 0.2295, |
|
"step": 1232 |
|
}, |
|
{ |
|
"epoch": 2.774336283185841, |
|
"grad_norm": 0.2019273042678833, |
|
"learning_rate": 4.180327868852459e-06, |
|
"loss": 0.2214, |
|
"step": 1254 |
|
}, |
|
{ |
|
"epoch": 2.823008849557522, |
|
"grad_norm": 0.32915887236595154, |
|
"learning_rate": 3.278688524590164e-06, |
|
"loss": 0.0037, |
|
"step": 1276 |
|
}, |
|
{ |
|
"epoch": 2.8716814159292037, |
|
"grad_norm": 0.023709949105978012, |
|
"learning_rate": 2.377049180327869e-06, |
|
"loss": 0.2387, |
|
"step": 1298 |
|
}, |
|
{ |
|
"epoch": 2.920353982300885, |
|
"grad_norm": 0.06727021932601929, |
|
"learning_rate": 1.4754098360655739e-06, |
|
"loss": 0.1205, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 2.9690265486725664, |
|
"grad_norm": 1.9222813844680786, |
|
"learning_rate": 5.737704918032787e-07, |
|
"loss": 0.1217, |
|
"step": 1342 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.9515418502202643, |
|
"eval_f1_macro": 0.8093855588593053, |
|
"eval_f1_micro": 0.9515418502202643, |
|
"eval_f1_weighted": 0.949198754683482, |
|
"eval_loss": 0.28241145610809326, |
|
"eval_precision_macro": 0.8090277777777778, |
|
"eval_precision_micro": 0.9515418502202643, |
|
"eval_precision_weighted": 0.9473201174743024, |
|
"eval_recall_macro": 0.8100845864661653, |
|
"eval_recall_micro": 0.9515418502202643, |
|
"eval_recall_weighted": 0.9515418502202643, |
|
"eval_runtime": 38.4831, |
|
"eval_samples_per_second": 5.899, |
|
"eval_steps_per_second": 1.481, |
|
"step": 1356 |
|
} |
|
], |
|
"logging_steps": 22, |
|
"max_steps": 1356, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 5, |
|
"early_stopping_threshold": 0.01 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 178397303961600.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|