|
{ |
|
"best_metric": 0.7926363320597429, |
|
"best_model_checkpoint": "logs/indian_build_rr/MHGanainy/xmod-shared-roberta-base-legal-multi/seed_1/checkpoint-992", |
|
"epoch": 11.0, |
|
"eval_steps": 500, |
|
"global_step": 1364, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.7460923931920805, |
|
"eval_loss": 0.8663367033004761, |
|
"eval_macro-f1": 0.47795631478138917, |
|
"eval_micro-f1": 0.7460923931920805, |
|
"eval_precision-macro": 0.537033221622001, |
|
"eval_precision-micro": 0.7460923931920805, |
|
"eval_recall-macro": 0.46833031643592427, |
|
"eval_recall-micro": 0.7460923931920805, |
|
"eval_runtime": 4.1397, |
|
"eval_samples_per_second": 7.247, |
|
"eval_steps_per_second": 3.623, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.6981590830149358, |
|
"eval_loss": 0.9012688398361206, |
|
"eval_macro-f1": 0.5023050615982335, |
|
"eval_micro-f1": 0.6981590830149358, |
|
"eval_precision-macro": 0.49382932128709284, |
|
"eval_precision-micro": 0.6981590830149358, |
|
"eval_recall-macro": 0.5409271662176228, |
|
"eval_recall-micro": 0.6981590830149358, |
|
"eval_runtime": 4.0156, |
|
"eval_samples_per_second": 7.471, |
|
"eval_steps_per_second": 3.735, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.7634595345606113, |
|
"eval_loss": 0.7796859741210938, |
|
"eval_macro-f1": 0.5415425168075806, |
|
"eval_micro-f1": 0.7634595345606113, |
|
"eval_precision-macro": 0.576524112938428, |
|
"eval_precision-micro": 0.7634595345606113, |
|
"eval_recall-macro": 0.5450809655959412, |
|
"eval_recall-micro": 0.7634595345606113, |
|
"eval_runtime": 4.0974, |
|
"eval_samples_per_second": 7.322, |
|
"eval_steps_per_second": 3.661, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.771795762417506, |
|
"eval_loss": 0.7202845215797424, |
|
"eval_macro-f1": 0.5408809683499423, |
|
"eval_micro-f1": 0.771795762417506, |
|
"eval_precision-macro": 0.6529799444819455, |
|
"eval_precision-micro": 0.771795762417506, |
|
"eval_recall-macro": 0.547754417411679, |
|
"eval_recall-micro": 0.771795762417506, |
|
"eval_runtime": 4.0517, |
|
"eval_samples_per_second": 7.404, |
|
"eval_steps_per_second": 3.702, |
|
"step": 496 |
|
}, |
|
{ |
|
"epoch": 4.032258064516129, |
|
"grad_norm": 6.1605224609375, |
|
"learning_rate": 2.398790322580645e-05, |
|
"loss": 0.9675, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.7777005904828065, |
|
"eval_loss": 0.7465451955795288, |
|
"eval_macro-f1": 0.5866418390333193, |
|
"eval_micro-f1": 0.7777005904828065, |
|
"eval_precision-macro": 0.5984466979286676, |
|
"eval_precision-micro": 0.7777005904828065, |
|
"eval_recall-macro": 0.5960188099213857, |
|
"eval_recall-micro": 0.7777005904828065, |
|
"eval_runtime": 4.1357, |
|
"eval_samples_per_second": 7.254, |
|
"eval_steps_per_second": 3.627, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.779089961792289, |
|
"eval_loss": 0.7502714991569519, |
|
"eval_macro-f1": 0.5699431692507972, |
|
"eval_micro-f1": 0.779089961792289, |
|
"eval_precision-macro": 0.613407043792223, |
|
"eval_precision-micro": 0.779089961792289, |
|
"eval_recall-macro": 0.5692211367836036, |
|
"eval_recall-micro": 0.779089961792289, |
|
"eval_runtime": 3.9786, |
|
"eval_samples_per_second": 7.54, |
|
"eval_steps_per_second": 3.77, |
|
"step": 744 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.7863841611670719, |
|
"eval_loss": 0.766509473323822, |
|
"eval_macro-f1": 0.5877444821246216, |
|
"eval_micro-f1": 0.7863841611670719, |
|
"eval_precision-macro": 0.6552426757086595, |
|
"eval_precision-micro": 0.7863841611670719, |
|
"eval_recall-macro": 0.5731587593411557, |
|
"eval_recall-micro": 0.7863841611670719, |
|
"eval_runtime": 4.3188, |
|
"eval_samples_per_second": 6.946, |
|
"eval_steps_per_second": 3.473, |
|
"step": 868 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.7926363320597429, |
|
"eval_loss": 0.765051007270813, |
|
"eval_macro-f1": 0.5936857133353053, |
|
"eval_micro-f1": 0.7926363320597429, |
|
"eval_precision-macro": 0.6253302610595257, |
|
"eval_precision-micro": 0.7926363320597429, |
|
"eval_recall-macro": 0.5880039852948196, |
|
"eval_recall-micro": 0.7926363320597429, |
|
"eval_runtime": 4.1192, |
|
"eval_samples_per_second": 7.283, |
|
"eval_steps_per_second": 3.641, |
|
"step": 992 |
|
}, |
|
{ |
|
"epoch": 8.064516129032258, |
|
"grad_norm": 4.945093154907227, |
|
"learning_rate": 1.793951612903226e-05, |
|
"loss": 0.5065, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.7766585620006947, |
|
"eval_loss": 0.8559873104095459, |
|
"eval_macro-f1": 0.594537265696571, |
|
"eval_micro-f1": 0.7766585620006947, |
|
"eval_precision-macro": 0.6074559744964598, |
|
"eval_precision-micro": 0.7766585620006947, |
|
"eval_recall-macro": 0.5930428516685216, |
|
"eval_recall-micro": 0.7766585620006947, |
|
"eval_runtime": 4.1612, |
|
"eval_samples_per_second": 7.209, |
|
"eval_steps_per_second": 3.605, |
|
"step": 1116 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.7902049322681487, |
|
"eval_loss": 0.8643214106559753, |
|
"eval_macro-f1": 0.5971614075338256, |
|
"eval_micro-f1": 0.7902049322681487, |
|
"eval_precision-macro": 0.6353648381926511, |
|
"eval_precision-micro": 0.7902049322681487, |
|
"eval_recall-macro": 0.5841529326694309, |
|
"eval_recall-micro": 0.7902049322681487, |
|
"eval_runtime": 4.3374, |
|
"eval_samples_per_second": 6.917, |
|
"eval_steps_per_second": 3.458, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.7895102466134074, |
|
"eval_loss": 0.9258854985237122, |
|
"eval_macro-f1": 0.6020711019594975, |
|
"eval_micro-f1": 0.7895102466134074, |
|
"eval_precision-macro": 0.6337053537013936, |
|
"eval_precision-micro": 0.7895102466134074, |
|
"eval_recall-macro": 0.5883671234037948, |
|
"eval_recall-micro": 0.7895102466134074, |
|
"eval_runtime": 3.9857, |
|
"eval_samples_per_second": 7.527, |
|
"eval_steps_per_second": 3.763, |
|
"step": 1364 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"step": 1364, |
|
"total_flos": 5.68024429585367e+16, |
|
"train_loss": 0.6171089015748145, |
|
"train_runtime": 1002.5092, |
|
"train_samples_per_second": 4.928, |
|
"train_steps_per_second": 2.474 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 2480, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 20, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 3, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 5.68024429585367e+16, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|