|
{ |
|
"best_metric": 0.005880220327526331, |
|
"best_model_checkpoint": "autotrain-a1ahc-punm7/checkpoint-50", |
|
"epoch": 5.0, |
|
"eval_steps": 500, |
|
"global_step": 50, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 192.0443115234375, |
|
"learning_rate": 2e-05, |
|
"loss": 1.6486, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 91.13789367675781, |
|
"learning_rate": 4e-05, |
|
"loss": 0.3726, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 86.33133697509766, |
|
"learning_rate": 6e-05, |
|
"loss": 1.529, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 70.01477813720703, |
|
"learning_rate": 8e-05, |
|
"loss": 1.4176, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 0.017295703291893005, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 0.1406904011964798, |
|
"learning_rate": 9.777777777777778e-05, |
|
"loss": 0.0005, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 0.0068831657990813255, |
|
"learning_rate": 9.555555555555557e-05, |
|
"loss": 0.0, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 0.1597442626953125, |
|
"learning_rate": 9.333333333333334e-05, |
|
"loss": 0.0006, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 76.26848602294922, |
|
"learning_rate": 9.111111111111112e-05, |
|
"loss": 0.9406, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 9.322993719251826e-05, |
|
"learning_rate": 8.888888888888889e-05, |
|
"loss": 0.0, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.9473684210526315, |
|
"eval_auc": 0.9444444444444444, |
|
"eval_f1": 0.972972972972973, |
|
"eval_loss": 0.5396547317504883, |
|
"eval_precision": 0.9473684210526315, |
|
"eval_recall": 1.0, |
|
"eval_runtime": 6.4147, |
|
"eval_samples_per_second": 2.962, |
|
"eval_steps_per_second": 0.312, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"grad_norm": 0.002093537012115121, |
|
"learning_rate": 8.666666666666667e-05, |
|
"loss": 0.0, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"grad_norm": 0.6939017176628113, |
|
"learning_rate": 8.444444444444444e-05, |
|
"loss": 0.0034, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"grad_norm": 27.010156631469727, |
|
"learning_rate": 8.222222222222222e-05, |
|
"loss": 0.3681, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"grad_norm": 0.08944934606552124, |
|
"learning_rate": 8e-05, |
|
"loss": 0.0004, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"grad_norm": 0.00046487816143780947, |
|
"learning_rate": 7.777777777777778e-05, |
|
"loss": 0.0, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"grad_norm": 26.83158302307129, |
|
"learning_rate": 7.555555555555556e-05, |
|
"loss": 0.3384, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"grad_norm": 22.95646858215332, |
|
"learning_rate": 7.333333333333333e-05, |
|
"loss": 0.4147, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"grad_norm": 12.867383003234863, |
|
"learning_rate": 7.111111111111112e-05, |
|
"loss": 0.101, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"grad_norm": 0.18528755009174347, |
|
"learning_rate": 6.88888888888889e-05, |
|
"loss": 0.0011, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 0.5274847745895386, |
|
"learning_rate": 6.666666666666667e-05, |
|
"loss": 0.0101, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.9473684210526315, |
|
"eval_auc": 1.0, |
|
"eval_f1": 0.9714285714285714, |
|
"eval_loss": 0.10451264679431915, |
|
"eval_precision": 1.0, |
|
"eval_recall": 0.9444444444444444, |
|
"eval_runtime": 7.0278, |
|
"eval_samples_per_second": 2.704, |
|
"eval_steps_per_second": 0.285, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"grad_norm": 46.64225769042969, |
|
"learning_rate": 6.444444444444446e-05, |
|
"loss": 1.3124, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"grad_norm": 39.809120178222656, |
|
"learning_rate": 6.222222222222222e-05, |
|
"loss": 1.2012, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"grad_norm": 7.132116794586182, |
|
"learning_rate": 6e-05, |
|
"loss": 0.0641, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"grad_norm": 8.217597961425781, |
|
"learning_rate": 5.7777777777777776e-05, |
|
"loss": 0.1189, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"grad_norm": 3.328246593475342, |
|
"learning_rate": 5.555555555555556e-05, |
|
"loss": 0.0219, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"grad_norm": 0.018878834322094917, |
|
"learning_rate": 5.333333333333333e-05, |
|
"loss": 0.0002, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"grad_norm": 0.05963844433426857, |
|
"learning_rate": 5.111111111111111e-05, |
|
"loss": 0.0003, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"grad_norm": 0.0006083645857870579, |
|
"learning_rate": 4.888888888888889e-05, |
|
"loss": 0.0, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"grad_norm": 1.1290885210037231, |
|
"learning_rate": 4.666666666666667e-05, |
|
"loss": 0.0058, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 0.0002643383922986686, |
|
"learning_rate": 4.4444444444444447e-05, |
|
"loss": 0.0, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.9473684210526315, |
|
"eval_auc": 1.0, |
|
"eval_f1": 0.972972972972973, |
|
"eval_loss": 0.34398871660232544, |
|
"eval_precision": 0.9473684210526315, |
|
"eval_recall": 1.0, |
|
"eval_runtime": 7.6262, |
|
"eval_samples_per_second": 2.491, |
|
"eval_steps_per_second": 0.262, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 3.1, |
|
"grad_norm": 0.000997014343738556, |
|
"learning_rate": 4.222222222222222e-05, |
|
"loss": 0.0, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"grad_norm": 0.0017871842719614506, |
|
"learning_rate": 4e-05, |
|
"loss": 0.0, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 3.3, |
|
"grad_norm": 125.35645294189453, |
|
"learning_rate": 3.777777777777778e-05, |
|
"loss": 0.4944, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 3.4, |
|
"grad_norm": 25.963590621948242, |
|
"learning_rate": 3.555555555555556e-05, |
|
"loss": 1.3397, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 3.5, |
|
"grad_norm": 48.228271484375, |
|
"learning_rate": 3.3333333333333335e-05, |
|
"loss": 0.6604, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 3.6, |
|
"grad_norm": 8.783335943007842e-07, |
|
"learning_rate": 3.111111111111111e-05, |
|
"loss": 0.0, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 3.7, |
|
"grad_norm": 44.1061897277832, |
|
"learning_rate": 2.8888888888888888e-05, |
|
"loss": 1.9752, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 3.8, |
|
"grad_norm": 28.28629493713379, |
|
"learning_rate": 2.6666666666666667e-05, |
|
"loss": 0.6833, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 3.9, |
|
"grad_norm": 1.6506115571246482e-05, |
|
"learning_rate": 2.4444444444444445e-05, |
|
"loss": 0.0, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 0.0209506805986166, |
|
"learning_rate": 2.2222222222222223e-05, |
|
"loss": 0.0001, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.9473684210526315, |
|
"eval_auc": 1.0, |
|
"eval_f1": 0.972972972972973, |
|
"eval_loss": 0.07567384093999863, |
|
"eval_precision": 0.9473684210526315, |
|
"eval_recall": 1.0, |
|
"eval_runtime": 6.5662, |
|
"eval_samples_per_second": 2.894, |
|
"eval_steps_per_second": 0.305, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 4.1, |
|
"grad_norm": 22.53162384033203, |
|
"learning_rate": 2e-05, |
|
"loss": 0.179, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 4.2, |
|
"grad_norm": 0.47764596343040466, |
|
"learning_rate": 1.777777777777778e-05, |
|
"loss": 0.0018, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 4.3, |
|
"grad_norm": 5.29976277903188e-05, |
|
"learning_rate": 1.5555555555555555e-05, |
|
"loss": 0.0, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 4.4, |
|
"grad_norm": 5.991899490356445, |
|
"learning_rate": 1.3333333333333333e-05, |
|
"loss": 0.0763, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 4.5, |
|
"grad_norm": 8.715898002265021e-05, |
|
"learning_rate": 1.1111111111111112e-05, |
|
"loss": 0.0, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 4.6, |
|
"grad_norm": 5.479080573422834e-05, |
|
"learning_rate": 8.88888888888889e-06, |
|
"loss": 0.0, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 4.7, |
|
"grad_norm": 13.429327964782715, |
|
"learning_rate": 6.666666666666667e-06, |
|
"loss": 0.2454, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 4.8, |
|
"grad_norm": 1.9549882411956787, |
|
"learning_rate": 4.444444444444445e-06, |
|
"loss": 0.0267, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 4.9, |
|
"grad_norm": 0.031847428530454636, |
|
"learning_rate": 2.2222222222222225e-06, |
|
"loss": 0.0001, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 1.6024940805436927e-06, |
|
"learning_rate": 0.0, |
|
"loss": 0.0, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 1.0, |
|
"eval_auc": 1.0, |
|
"eval_f1": 1.0, |
|
"eval_loss": 0.005880220327526331, |
|
"eval_precision": 1.0, |
|
"eval_recall": 1.0, |
|
"eval_runtime": 7.0124, |
|
"eval_samples_per_second": 2.709, |
|
"eval_steps_per_second": 0.285, |
|
"step": 50 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 50, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 5, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 5, |
|
"early_stopping_threshold": 0.01 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 2.898785558237184e+16, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|