{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 6313, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0792016473942658, "grad_norm": 0.07919532805681229, "learning_rate": 3.9556962025316456e-05, "loss": 0.1311, "step": 500 }, { "epoch": 0.0792016473942658, "eval_loss": 0.014126550406217575, "eval_runtime": 83.0348, "eval_samples_per_second": 60.216, "eval_steps_per_second": 1.891, "eval_sts-dev_pearson_cosine": 0.1557480599999687, "eval_sts-dev_pearson_dot": 0.13799264585261115, "eval_sts-dev_pearson_euclidean": 0.18714752476831273, "eval_sts-dev_pearson_manhattan": 0.18855989459573652, "eval_sts-dev_pearson_max": 0.18855989459573652, "eval_sts-dev_spearman_cosine": 0.20357424121861373, "eval_sts-dev_spearman_dot": 0.14034776792350498, "eval_sts-dev_spearman_euclidean": 0.21044421396782537, "eval_sts-dev_spearman_manhattan": 0.21183531991746804, "eval_sts-dev_spearman_max": 0.21183531991746804, "step": 500 }, { "epoch": 0.1584032947885316, "grad_norm": 0.012879022397100925, "learning_rate": 4.677873613800388e-05, "loss": 0.0203, "step": 1000 }, { "epoch": 0.1584032947885316, "eval_loss": 0.015841394662857056, "eval_runtime": 82.5774, "eval_samples_per_second": 60.549, "eval_steps_per_second": 1.901, "eval_sts-dev_pearson_cosine": 0.16793876783774, "eval_sts-dev_pearson_dot": 0.1481957278796634, "eval_sts-dev_pearson_euclidean": 0.19722850814365245, "eval_sts-dev_pearson_manhattan": 0.19617097331605537, "eval_sts-dev_pearson_max": 0.19722850814365245, "eval_sts-dev_spearman_cosine": 0.19966948583403588, "eval_sts-dev_spearman_dot": 0.14869572921537724, "eval_sts-dev_spearman_euclidean": 0.208144651870388, "eval_sts-dev_spearman_manhattan": 0.20697988059772135, "eval_sts-dev_spearman_max": 0.208144651870388, "step": 1000 }, { "epoch": 0.2376049421827974, "grad_norm": 0.007155057042837143, "learning_rate": 4.2378102446752334e-05, "loss": 0.0174, "step": 1500 }, { "epoch": 0.2376049421827974, "eval_loss": 0.01738722249865532, "eval_runtime": 82.5636, "eval_samples_per_second": 60.559, "eval_steps_per_second": 1.902, "eval_sts-dev_pearson_cosine": 0.13663702381510662, "eval_sts-dev_pearson_dot": 0.11588852598989118, "eval_sts-dev_pearson_euclidean": 0.16056667152911408, "eval_sts-dev_pearson_manhattan": 0.15924268782656817, "eval_sts-dev_pearson_max": 0.16056667152911408, "eval_sts-dev_spearman_cosine": 0.16527466832006016, "eval_sts-dev_spearman_dot": 0.10078185264794931, "eval_sts-dev_spearman_euclidean": 0.1719215715191216, "eval_sts-dev_spearman_manhattan": 0.170303502778187, "eval_sts-dev_spearman_max": 0.1719215715191216, "step": 1500 }, { "epoch": 0.3168065895770632, "grad_norm": 0.002368535613641143, "learning_rate": 3.79774687555008e-05, "loss": 0.0108, "step": 2000 }, { "epoch": 0.3168065895770632, "eval_loss": 0.013646911829710007, "eval_runtime": 83.1873, "eval_samples_per_second": 60.105, "eval_steps_per_second": 1.887, "eval_sts-dev_pearson_cosine": 0.14207290429771552, "eval_sts-dev_pearson_dot": 0.12219302310028649, "eval_sts-dev_pearson_euclidean": 0.15249164642806468, "eval_sts-dev_pearson_manhattan": 0.15099460310457263, "eval_sts-dev_pearson_max": 0.15249164642806468, "eval_sts-dev_spearman_cosine": 0.14567413155731176, "eval_sts-dev_spearman_dot": 0.09939489795167657, "eval_sts-dev_spearman_euclidean": 0.14960035136962835, "eval_sts-dev_spearman_manhattan": 0.14872808741782187, "eval_sts-dev_spearman_max": 0.14960035136962835, "step": 2000 }, { "epoch": 0.39600823697132903, "grad_norm": 0.011403551325201988, "learning_rate": 3.3576835064249254e-05, "loss": 0.0121, "step": 2500 }, { "epoch": 0.39600823697132903, "eval_loss": 0.015611983835697174, "eval_runtime": 82.8829, "eval_samples_per_second": 60.326, "eval_steps_per_second": 1.894, "eval_sts-dev_pearson_cosine": 0.1786266334873075, "eval_sts-dev_pearson_dot": 0.16571459472812308, "eval_sts-dev_pearson_euclidean": 0.19586188718253267, "eval_sts-dev_pearson_manhattan": 0.19603778309890557, "eval_sts-dev_pearson_max": 0.19603778309890557, "eval_sts-dev_spearman_cosine": 0.20990140220242978, "eval_sts-dev_spearman_dot": 0.1668269410484095, "eval_sts-dev_spearman_euclidean": 0.21443661354146873, "eval_sts-dev_spearman_manhattan": 0.21529338637929912, "eval_sts-dev_spearman_max": 0.21529338637929912, "step": 2500 }, { "epoch": 0.4752098843655948, "grad_norm": 0.004168146755546331, "learning_rate": 2.9176201372997714e-05, "loss": 0.0122, "step": 3000 }, { "epoch": 0.4752098843655948, "eval_loss": 0.013952625915408134, "eval_runtime": 82.7861, "eval_samples_per_second": 60.397, "eval_steps_per_second": 1.896, "eval_sts-dev_pearson_cosine": 0.16089480397614714, "eval_sts-dev_pearson_dot": 0.14333252395560012, "eval_sts-dev_pearson_euclidean": 0.17458568236482797, "eval_sts-dev_pearson_manhattan": 0.1736800184097837, "eval_sts-dev_pearson_max": 0.17458568236482797, "eval_sts-dev_spearman_cosine": 0.17227083866593193, "eval_sts-dev_spearman_dot": 0.12781779851368713, "eval_sts-dev_spearman_euclidean": 0.17703810710585532, "eval_sts-dev_spearman_manhattan": 0.17557253669161538, "eval_sts-dev_spearman_max": 0.17703810710585532, "step": 3000 }, { "epoch": 0.5544115317598606, "grad_norm": 0.004282405134290457, "learning_rate": 2.4775567681746174e-05, "loss": 0.0125, "step": 3500 }, { "epoch": 0.5544115317598606, "eval_loss": 0.011783541180193424, "eval_runtime": 82.8159, "eval_samples_per_second": 60.375, "eval_steps_per_second": 1.896, "eval_sts-dev_pearson_cosine": 0.18448919166260044, "eval_sts-dev_pearson_dot": 0.17905699568214264, "eval_sts-dev_pearson_euclidean": 0.20624063360858977, "eval_sts-dev_pearson_manhattan": 0.205619351099398, "eval_sts-dev_pearson_max": 0.20624063360858977, "eval_sts-dev_spearman_cosine": 0.22476168122019577, "eval_sts-dev_spearman_dot": 0.19305224567026316, "eval_sts-dev_spearman_euclidean": 0.2268539691521332, "eval_sts-dev_spearman_manhattan": 0.2268021523901189, "eval_sts-dev_spearman_max": 0.2268539691521332, "step": 3500 }, { "epoch": 0.6336131791541264, "grad_norm": 0.016134686768054962, "learning_rate": 2.0374933990494634e-05, "loss": 0.0079, "step": 4000 }, { "epoch": 0.6336131791541264, "eval_loss": 0.011526196263730526, "eval_runtime": 83.1812, "eval_samples_per_second": 60.11, "eval_steps_per_second": 1.887, "eval_sts-dev_pearson_cosine": 0.16979017817169434, "eval_sts-dev_pearson_dot": 0.16329193511035556, "eval_sts-dev_pearson_euclidean": 0.2041557241070686, "eval_sts-dev_pearson_manhattan": 0.20415667390135622, "eval_sts-dev_pearson_max": 0.20415667390135622, "eval_sts-dev_spearman_cosine": 0.23370816253094054, "eval_sts-dev_spearman_dot": 0.19110938133669397, "eval_sts-dev_spearman_euclidean": 0.23731458674719166, "eval_sts-dev_spearman_manhattan": 0.2363744330684564, "eval_sts-dev_spearman_max": 0.23731458674719166, "step": 4000 }, { "epoch": 0.7128148265483922, "grad_norm": 0.10654988884925842, "learning_rate": 1.5983101566625593e-05, "loss": 0.0093, "step": 4500 }, { "epoch": 0.7128148265483922, "eval_loss": 0.01042733620852232, "eval_runtime": 82.4696, "eval_samples_per_second": 60.628, "eval_steps_per_second": 1.904, "eval_sts-dev_pearson_cosine": 0.16883832363197002, "eval_sts-dev_pearson_dot": 0.16172877878537467, "eval_sts-dev_pearson_euclidean": 0.20343671061551505, "eval_sts-dev_pearson_manhattan": 0.20269317144225543, "eval_sts-dev_pearson_max": 0.20343671061551505, "eval_sts-dev_spearman_cosine": 0.23314123864913222, "eval_sts-dev_spearman_dot": 0.19029123486315452, "eval_sts-dev_spearman_euclidean": 0.23747745874658102, "eval_sts-dev_spearman_manhattan": 0.23681371095402073, "eval_sts-dev_spearman_max": 0.23747745874658102, "step": 4500 }, { "epoch": 0.7920164739426581, "grad_norm": 0.0034745726734399796, "learning_rate": 1.1582467875374054e-05, "loss": 0.0071, "step": 5000 }, { "epoch": 0.7920164739426581, "eval_loss": 0.010719917714595795, "eval_runtime": 82.5902, "eval_samples_per_second": 60.54, "eval_steps_per_second": 1.901, "eval_sts-dev_pearson_cosine": 0.19565242771314767, "eval_sts-dev_pearson_dot": 0.18923012649171922, "eval_sts-dev_pearson_euclidean": 0.2278768429358364, "eval_sts-dev_pearson_manhattan": 0.22768897126347665, "eval_sts-dev_pearson_max": 0.2278768429358364, "eval_sts-dev_spearman_cosine": 0.24240094548214325, "eval_sts-dev_spearman_dot": 0.21284981986619678, "eval_sts-dev_spearman_euclidean": 0.24732163285243935, "eval_sts-dev_spearman_manhattan": 0.24736929658665446, "eval_sts-dev_spearman_max": 0.24736929658665446, "step": 5000 }, { "epoch": 0.8712181213369238, "grad_norm": 0.004818719811737537, "learning_rate": 7.181834184122514e-06, "loss": 0.0041, "step": 5500 }, { "epoch": 0.8712181213369238, "eval_loss": 0.009991911239922047, "eval_runtime": 82.1637, "eval_samples_per_second": 60.854, "eval_steps_per_second": 1.911, "eval_sts-dev_pearson_cosine": 0.1892693811813182, "eval_sts-dev_pearson_dot": 0.18290497740650222, "eval_sts-dev_pearson_euclidean": 0.22463164519842746, "eval_sts-dev_pearson_manhattan": 0.22460268853676083, "eval_sts-dev_pearson_max": 0.22463164519842746, "eval_sts-dev_spearman_cosine": 0.24628027091826607, "eval_sts-dev_spearman_dot": 0.21127633301239485, "eval_sts-dev_spearman_euclidean": 0.251238048387475, "eval_sts-dev_spearman_manhattan": 0.2518403299451181, "eval_sts-dev_spearman_max": 0.2518403299451181, "step": 5500 }, { "epoch": 0.9504197687311896, "grad_norm": 0.012634661048650742, "learning_rate": 2.7812004928709737e-06, "loss": 0.0069, "step": 6000 }, { "epoch": 0.9504197687311896, "eval_loss": 0.009837556630373001, "eval_runtime": 83.3155, "eval_samples_per_second": 60.013, "eval_steps_per_second": 1.884, "eval_sts-dev_pearson_cosine": 0.19373258731869963, "eval_sts-dev_pearson_dot": 0.18775862207030505, "eval_sts-dev_pearson_euclidean": 0.22537635202224982, "eval_sts-dev_pearson_manhattan": 0.2245827911400446, "eval_sts-dev_pearson_max": 0.22537635202224982, "eval_sts-dev_spearman_cosine": 0.24307341815427166, "eval_sts-dev_spearman_dot": 0.2124049530103558, "eval_sts-dev_spearman_euclidean": 0.24695143686545143, "eval_sts-dev_spearman_manhattan": 0.2468102784042943, "eval_sts-dev_spearman_max": 0.24695143686545143, "step": 6000 } ], "logging_steps": 500, "max_steps": 6313, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 32, "trial_name": null, "trial_params": null }