|
{ |
|
"best_metric": 0.3079245686531067, |
|
"best_model_checkpoint": "autotrain-kcpio-wclqs/checkpoint-712", |
|
"epoch": 1.0, |
|
"eval_steps": 500, |
|
"global_step": 712, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0351123595505618, |
|
"grad_norm": 1.9374197721481323, |
|
"learning_rate": 1.736111111111111e-05, |
|
"loss": 0.6963, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.0702247191011236, |
|
"grad_norm": 3.9908769130706787, |
|
"learning_rate": 3.3333333333333335e-05, |
|
"loss": 0.5465, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.10533707865168539, |
|
"grad_norm": 8.669546127319336, |
|
"learning_rate": 4.930555555555556e-05, |
|
"loss": 0.4025, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.1404494382022472, |
|
"grad_norm": 0.6609323024749756, |
|
"learning_rate": 4.8125000000000004e-05, |
|
"loss": 0.338, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.175561797752809, |
|
"grad_norm": 0.6923323273658752, |
|
"learning_rate": 4.6171875e-05, |
|
"loss": 0.265, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.21067415730337077, |
|
"grad_norm": 0.6821861267089844, |
|
"learning_rate": 4.421875e-05, |
|
"loss": 0.2498, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.24578651685393257, |
|
"grad_norm": 0.10477828234434128, |
|
"learning_rate": 4.2265625000000006e-05, |
|
"loss": 0.1785, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.2808988764044944, |
|
"grad_norm": 3.7019550800323486, |
|
"learning_rate": 4.0312500000000004e-05, |
|
"loss": 0.2335, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.3160112359550562, |
|
"grad_norm": 16.70686912536621, |
|
"learning_rate": 3.8359375e-05, |
|
"loss": 0.1422, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.351123595505618, |
|
"grad_norm": 0.07099178433418274, |
|
"learning_rate": 3.640625e-05, |
|
"loss": 0.1525, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.3862359550561798, |
|
"grad_norm": 0.049899421632289886, |
|
"learning_rate": 3.4453125000000006e-05, |
|
"loss": 0.0916, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.42134831460674155, |
|
"grad_norm": 0.021365324035286903, |
|
"learning_rate": 3.2500000000000004e-05, |
|
"loss": 0.0504, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.45646067415730335, |
|
"grad_norm": 0.03689989075064659, |
|
"learning_rate": 3.0546875e-05, |
|
"loss": 0.09, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.49157303370786515, |
|
"grad_norm": 0.24886544048786163, |
|
"learning_rate": 2.8593750000000004e-05, |
|
"loss": 0.0951, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.526685393258427, |
|
"grad_norm": 0.06867190450429916, |
|
"learning_rate": 2.6640625000000002e-05, |
|
"loss": 0.0483, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.5617977528089888, |
|
"grad_norm": 0.29167991876602173, |
|
"learning_rate": 2.4687500000000004e-05, |
|
"loss": 0.1251, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.5969101123595506, |
|
"grad_norm": 0.10820268094539642, |
|
"learning_rate": 2.2734375000000002e-05, |
|
"loss": 0.1432, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.6320224719101124, |
|
"grad_norm": 0.07357452809810638, |
|
"learning_rate": 2.0781250000000004e-05, |
|
"loss": 0.0941, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.6671348314606742, |
|
"grad_norm": 0.13083012402057648, |
|
"learning_rate": 1.8828125000000002e-05, |
|
"loss": 0.1754, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.702247191011236, |
|
"grad_norm": 0.11591902375221252, |
|
"learning_rate": 1.6875000000000004e-05, |
|
"loss": 0.1484, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.7373595505617978, |
|
"grad_norm": 23.016658782958984, |
|
"learning_rate": 1.4921875000000002e-05, |
|
"loss": 0.1112, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.7724719101123596, |
|
"grad_norm": 29.17462158203125, |
|
"learning_rate": 1.2968750000000002e-05, |
|
"loss": 0.0749, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.8075842696629213, |
|
"grad_norm": 0.27380064129829407, |
|
"learning_rate": 1.1015625e-05, |
|
"loss": 0.1224, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 0.8426966292134831, |
|
"grad_norm": 0.11491677910089493, |
|
"learning_rate": 9.0625e-06, |
|
"loss": 0.04, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.8778089887640449, |
|
"grad_norm": 0.04559633880853653, |
|
"learning_rate": 7.109375e-06, |
|
"loss": 0.032, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 0.9129213483146067, |
|
"grad_norm": 0.12071628123521805, |
|
"learning_rate": 5.15625e-06, |
|
"loss": 0.1393, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.9480337078651685, |
|
"grad_norm": 0.14503008127212524, |
|
"learning_rate": 3.203125e-06, |
|
"loss": 0.0517, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 0.9831460674157303, |
|
"grad_norm": 0.045122675597667694, |
|
"learning_rate": 1.25e-06, |
|
"loss": 0.0517, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.9334958217270195, |
|
"eval_auc": 0.9840253557545333, |
|
"eval_f1": 0.9307469180565627, |
|
"eval_loss": 0.3079245686531067, |
|
"eval_precision": 0.9708774583963692, |
|
"eval_recall": 0.8938022284122563, |
|
"eval_runtime": 20.2322, |
|
"eval_samples_per_second": 283.904, |
|
"eval_steps_per_second": 17.744, |
|
"step": 712 |
|
} |
|
], |
|
"logging_steps": 25, |
|
"max_steps": 712, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 5, |
|
"early_stopping_threshold": 0.01 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 374413746333696.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|