|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 500, |
|
"global_step": 6313, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0792016473942658, |
|
"grad_norm": 0.07919532805681229, |
|
"learning_rate": 3.9556962025316456e-05, |
|
"loss": 0.1311, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.0792016473942658, |
|
"eval_loss": 0.014126550406217575, |
|
"eval_runtime": 83.0348, |
|
"eval_samples_per_second": 60.216, |
|
"eval_steps_per_second": 1.891, |
|
"eval_sts-dev_pearson_cosine": 0.1557480599999687, |
|
"eval_sts-dev_pearson_dot": 0.13799264585261115, |
|
"eval_sts-dev_pearson_euclidean": 0.18714752476831273, |
|
"eval_sts-dev_pearson_manhattan": 0.18855989459573652, |
|
"eval_sts-dev_pearson_max": 0.18855989459573652, |
|
"eval_sts-dev_spearman_cosine": 0.20357424121861373, |
|
"eval_sts-dev_spearman_dot": 0.14034776792350498, |
|
"eval_sts-dev_spearman_euclidean": 0.21044421396782537, |
|
"eval_sts-dev_spearman_manhattan": 0.21183531991746804, |
|
"eval_sts-dev_spearman_max": 0.21183531991746804, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.1584032947885316, |
|
"grad_norm": 0.012879022397100925, |
|
"learning_rate": 4.677873613800388e-05, |
|
"loss": 0.0203, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.1584032947885316, |
|
"eval_loss": 0.015841394662857056, |
|
"eval_runtime": 82.5774, |
|
"eval_samples_per_second": 60.549, |
|
"eval_steps_per_second": 1.901, |
|
"eval_sts-dev_pearson_cosine": 0.16793876783774, |
|
"eval_sts-dev_pearson_dot": 0.1481957278796634, |
|
"eval_sts-dev_pearson_euclidean": 0.19722850814365245, |
|
"eval_sts-dev_pearson_manhattan": 0.19617097331605537, |
|
"eval_sts-dev_pearson_max": 0.19722850814365245, |
|
"eval_sts-dev_spearman_cosine": 0.19966948583403588, |
|
"eval_sts-dev_spearman_dot": 0.14869572921537724, |
|
"eval_sts-dev_spearman_euclidean": 0.208144651870388, |
|
"eval_sts-dev_spearman_manhattan": 0.20697988059772135, |
|
"eval_sts-dev_spearman_max": 0.208144651870388, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.2376049421827974, |
|
"grad_norm": 0.007155057042837143, |
|
"learning_rate": 4.2378102446752334e-05, |
|
"loss": 0.0174, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.2376049421827974, |
|
"eval_loss": 0.01738722249865532, |
|
"eval_runtime": 82.5636, |
|
"eval_samples_per_second": 60.559, |
|
"eval_steps_per_second": 1.902, |
|
"eval_sts-dev_pearson_cosine": 0.13663702381510662, |
|
"eval_sts-dev_pearson_dot": 0.11588852598989118, |
|
"eval_sts-dev_pearson_euclidean": 0.16056667152911408, |
|
"eval_sts-dev_pearson_manhattan": 0.15924268782656817, |
|
"eval_sts-dev_pearson_max": 0.16056667152911408, |
|
"eval_sts-dev_spearman_cosine": 0.16527466832006016, |
|
"eval_sts-dev_spearman_dot": 0.10078185264794931, |
|
"eval_sts-dev_spearman_euclidean": 0.1719215715191216, |
|
"eval_sts-dev_spearman_manhattan": 0.170303502778187, |
|
"eval_sts-dev_spearman_max": 0.1719215715191216, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.3168065895770632, |
|
"grad_norm": 0.002368535613641143, |
|
"learning_rate": 3.79774687555008e-05, |
|
"loss": 0.0108, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.3168065895770632, |
|
"eval_loss": 0.013646911829710007, |
|
"eval_runtime": 83.1873, |
|
"eval_samples_per_second": 60.105, |
|
"eval_steps_per_second": 1.887, |
|
"eval_sts-dev_pearson_cosine": 0.14207290429771552, |
|
"eval_sts-dev_pearson_dot": 0.12219302310028649, |
|
"eval_sts-dev_pearson_euclidean": 0.15249164642806468, |
|
"eval_sts-dev_pearson_manhattan": 0.15099460310457263, |
|
"eval_sts-dev_pearson_max": 0.15249164642806468, |
|
"eval_sts-dev_spearman_cosine": 0.14567413155731176, |
|
"eval_sts-dev_spearman_dot": 0.09939489795167657, |
|
"eval_sts-dev_spearman_euclidean": 0.14960035136962835, |
|
"eval_sts-dev_spearman_manhattan": 0.14872808741782187, |
|
"eval_sts-dev_spearman_max": 0.14960035136962835, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.39600823697132903, |
|
"grad_norm": 0.011403551325201988, |
|
"learning_rate": 3.3576835064249254e-05, |
|
"loss": 0.0121, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.39600823697132903, |
|
"eval_loss": 0.015611983835697174, |
|
"eval_runtime": 82.8829, |
|
"eval_samples_per_second": 60.326, |
|
"eval_steps_per_second": 1.894, |
|
"eval_sts-dev_pearson_cosine": 0.1786266334873075, |
|
"eval_sts-dev_pearson_dot": 0.16571459472812308, |
|
"eval_sts-dev_pearson_euclidean": 0.19586188718253267, |
|
"eval_sts-dev_pearson_manhattan": 0.19603778309890557, |
|
"eval_sts-dev_pearson_max": 0.19603778309890557, |
|
"eval_sts-dev_spearman_cosine": 0.20990140220242978, |
|
"eval_sts-dev_spearman_dot": 0.1668269410484095, |
|
"eval_sts-dev_spearman_euclidean": 0.21443661354146873, |
|
"eval_sts-dev_spearman_manhattan": 0.21529338637929912, |
|
"eval_sts-dev_spearman_max": 0.21529338637929912, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.4752098843655948, |
|
"grad_norm": 0.004168146755546331, |
|
"learning_rate": 2.9176201372997714e-05, |
|
"loss": 0.0122, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.4752098843655948, |
|
"eval_loss": 0.013952625915408134, |
|
"eval_runtime": 82.7861, |
|
"eval_samples_per_second": 60.397, |
|
"eval_steps_per_second": 1.896, |
|
"eval_sts-dev_pearson_cosine": 0.16089480397614714, |
|
"eval_sts-dev_pearson_dot": 0.14333252395560012, |
|
"eval_sts-dev_pearson_euclidean": 0.17458568236482797, |
|
"eval_sts-dev_pearson_manhattan": 0.1736800184097837, |
|
"eval_sts-dev_pearson_max": 0.17458568236482797, |
|
"eval_sts-dev_spearman_cosine": 0.17227083866593193, |
|
"eval_sts-dev_spearman_dot": 0.12781779851368713, |
|
"eval_sts-dev_spearman_euclidean": 0.17703810710585532, |
|
"eval_sts-dev_spearman_manhattan": 0.17557253669161538, |
|
"eval_sts-dev_spearman_max": 0.17703810710585532, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.5544115317598606, |
|
"grad_norm": 0.004282405134290457, |
|
"learning_rate": 2.4775567681746174e-05, |
|
"loss": 0.0125, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.5544115317598606, |
|
"eval_loss": 0.011783541180193424, |
|
"eval_runtime": 82.8159, |
|
"eval_samples_per_second": 60.375, |
|
"eval_steps_per_second": 1.896, |
|
"eval_sts-dev_pearson_cosine": 0.18448919166260044, |
|
"eval_sts-dev_pearson_dot": 0.17905699568214264, |
|
"eval_sts-dev_pearson_euclidean": 0.20624063360858977, |
|
"eval_sts-dev_pearson_manhattan": 0.205619351099398, |
|
"eval_sts-dev_pearson_max": 0.20624063360858977, |
|
"eval_sts-dev_spearman_cosine": 0.22476168122019577, |
|
"eval_sts-dev_spearman_dot": 0.19305224567026316, |
|
"eval_sts-dev_spearman_euclidean": 0.2268539691521332, |
|
"eval_sts-dev_spearman_manhattan": 0.2268021523901189, |
|
"eval_sts-dev_spearman_max": 0.2268539691521332, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.6336131791541264, |
|
"grad_norm": 0.016134686768054962, |
|
"learning_rate": 2.0374933990494634e-05, |
|
"loss": 0.0079, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.6336131791541264, |
|
"eval_loss": 0.011526196263730526, |
|
"eval_runtime": 83.1812, |
|
"eval_samples_per_second": 60.11, |
|
"eval_steps_per_second": 1.887, |
|
"eval_sts-dev_pearson_cosine": 0.16979017817169434, |
|
"eval_sts-dev_pearson_dot": 0.16329193511035556, |
|
"eval_sts-dev_pearson_euclidean": 0.2041557241070686, |
|
"eval_sts-dev_pearson_manhattan": 0.20415667390135622, |
|
"eval_sts-dev_pearson_max": 0.20415667390135622, |
|
"eval_sts-dev_spearman_cosine": 0.23370816253094054, |
|
"eval_sts-dev_spearman_dot": 0.19110938133669397, |
|
"eval_sts-dev_spearman_euclidean": 0.23731458674719166, |
|
"eval_sts-dev_spearman_manhattan": 0.2363744330684564, |
|
"eval_sts-dev_spearman_max": 0.23731458674719166, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.7128148265483922, |
|
"grad_norm": 0.10654988884925842, |
|
"learning_rate": 1.5983101566625593e-05, |
|
"loss": 0.0093, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.7128148265483922, |
|
"eval_loss": 0.01042733620852232, |
|
"eval_runtime": 82.4696, |
|
"eval_samples_per_second": 60.628, |
|
"eval_steps_per_second": 1.904, |
|
"eval_sts-dev_pearson_cosine": 0.16883832363197002, |
|
"eval_sts-dev_pearson_dot": 0.16172877878537467, |
|
"eval_sts-dev_pearson_euclidean": 0.20343671061551505, |
|
"eval_sts-dev_pearson_manhattan": 0.20269317144225543, |
|
"eval_sts-dev_pearson_max": 0.20343671061551505, |
|
"eval_sts-dev_spearman_cosine": 0.23314123864913222, |
|
"eval_sts-dev_spearman_dot": 0.19029123486315452, |
|
"eval_sts-dev_spearman_euclidean": 0.23747745874658102, |
|
"eval_sts-dev_spearman_manhattan": 0.23681371095402073, |
|
"eval_sts-dev_spearman_max": 0.23747745874658102, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.7920164739426581, |
|
"grad_norm": 0.0034745726734399796, |
|
"learning_rate": 1.1582467875374054e-05, |
|
"loss": 0.0071, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.7920164739426581, |
|
"eval_loss": 0.010719917714595795, |
|
"eval_runtime": 82.5902, |
|
"eval_samples_per_second": 60.54, |
|
"eval_steps_per_second": 1.901, |
|
"eval_sts-dev_pearson_cosine": 0.19565242771314767, |
|
"eval_sts-dev_pearson_dot": 0.18923012649171922, |
|
"eval_sts-dev_pearson_euclidean": 0.2278768429358364, |
|
"eval_sts-dev_pearson_manhattan": 0.22768897126347665, |
|
"eval_sts-dev_pearson_max": 0.2278768429358364, |
|
"eval_sts-dev_spearman_cosine": 0.24240094548214325, |
|
"eval_sts-dev_spearman_dot": 0.21284981986619678, |
|
"eval_sts-dev_spearman_euclidean": 0.24732163285243935, |
|
"eval_sts-dev_spearman_manhattan": 0.24736929658665446, |
|
"eval_sts-dev_spearman_max": 0.24736929658665446, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.8712181213369238, |
|
"grad_norm": 0.004818719811737537, |
|
"learning_rate": 7.181834184122514e-06, |
|
"loss": 0.0041, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.8712181213369238, |
|
"eval_loss": 0.009991911239922047, |
|
"eval_runtime": 82.1637, |
|
"eval_samples_per_second": 60.854, |
|
"eval_steps_per_second": 1.911, |
|
"eval_sts-dev_pearson_cosine": 0.1892693811813182, |
|
"eval_sts-dev_pearson_dot": 0.18290497740650222, |
|
"eval_sts-dev_pearson_euclidean": 0.22463164519842746, |
|
"eval_sts-dev_pearson_manhattan": 0.22460268853676083, |
|
"eval_sts-dev_pearson_max": 0.22463164519842746, |
|
"eval_sts-dev_spearman_cosine": 0.24628027091826607, |
|
"eval_sts-dev_spearman_dot": 0.21127633301239485, |
|
"eval_sts-dev_spearman_euclidean": 0.251238048387475, |
|
"eval_sts-dev_spearman_manhattan": 0.2518403299451181, |
|
"eval_sts-dev_spearman_max": 0.2518403299451181, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.9504197687311896, |
|
"grad_norm": 0.012634661048650742, |
|
"learning_rate": 2.7812004928709737e-06, |
|
"loss": 0.0069, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.9504197687311896, |
|
"eval_loss": 0.009837556630373001, |
|
"eval_runtime": 83.3155, |
|
"eval_samples_per_second": 60.013, |
|
"eval_steps_per_second": 1.884, |
|
"eval_sts-dev_pearson_cosine": 0.19373258731869963, |
|
"eval_sts-dev_pearson_dot": 0.18775862207030505, |
|
"eval_sts-dev_pearson_euclidean": 0.22537635202224982, |
|
"eval_sts-dev_pearson_manhattan": 0.2245827911400446, |
|
"eval_sts-dev_pearson_max": 0.22537635202224982, |
|
"eval_sts-dev_spearman_cosine": 0.24307341815427166, |
|
"eval_sts-dev_spearman_dot": 0.2124049530103558, |
|
"eval_sts-dev_spearman_euclidean": 0.24695143686545143, |
|
"eval_sts-dev_spearman_manhattan": 0.2468102784042943, |
|
"eval_sts-dev_spearman_max": 0.24695143686545143, |
|
"step": 6000 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 6313, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 32, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|