File size: 12,448 Bytes
8495761 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 |
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.0,
"eval_steps": 500,
"global_step": 6313,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0792016473942658,
"grad_norm": 0.07919532805681229,
"learning_rate": 3.9556962025316456e-05,
"loss": 0.1311,
"step": 500
},
{
"epoch": 0.0792016473942658,
"eval_loss": 0.014126550406217575,
"eval_runtime": 83.0348,
"eval_samples_per_second": 60.216,
"eval_steps_per_second": 1.891,
"eval_sts-dev_pearson_cosine": 0.1557480599999687,
"eval_sts-dev_pearson_dot": 0.13799264585261115,
"eval_sts-dev_pearson_euclidean": 0.18714752476831273,
"eval_sts-dev_pearson_manhattan": 0.18855989459573652,
"eval_sts-dev_pearson_max": 0.18855989459573652,
"eval_sts-dev_spearman_cosine": 0.20357424121861373,
"eval_sts-dev_spearman_dot": 0.14034776792350498,
"eval_sts-dev_spearman_euclidean": 0.21044421396782537,
"eval_sts-dev_spearman_manhattan": 0.21183531991746804,
"eval_sts-dev_spearman_max": 0.21183531991746804,
"step": 500
},
{
"epoch": 0.1584032947885316,
"grad_norm": 0.012879022397100925,
"learning_rate": 4.677873613800388e-05,
"loss": 0.0203,
"step": 1000
},
{
"epoch": 0.1584032947885316,
"eval_loss": 0.015841394662857056,
"eval_runtime": 82.5774,
"eval_samples_per_second": 60.549,
"eval_steps_per_second": 1.901,
"eval_sts-dev_pearson_cosine": 0.16793876783774,
"eval_sts-dev_pearson_dot": 0.1481957278796634,
"eval_sts-dev_pearson_euclidean": 0.19722850814365245,
"eval_sts-dev_pearson_manhattan": 0.19617097331605537,
"eval_sts-dev_pearson_max": 0.19722850814365245,
"eval_sts-dev_spearman_cosine": 0.19966948583403588,
"eval_sts-dev_spearman_dot": 0.14869572921537724,
"eval_sts-dev_spearman_euclidean": 0.208144651870388,
"eval_sts-dev_spearman_manhattan": 0.20697988059772135,
"eval_sts-dev_spearman_max": 0.208144651870388,
"step": 1000
},
{
"epoch": 0.2376049421827974,
"grad_norm": 0.007155057042837143,
"learning_rate": 4.2378102446752334e-05,
"loss": 0.0174,
"step": 1500
},
{
"epoch": 0.2376049421827974,
"eval_loss": 0.01738722249865532,
"eval_runtime": 82.5636,
"eval_samples_per_second": 60.559,
"eval_steps_per_second": 1.902,
"eval_sts-dev_pearson_cosine": 0.13663702381510662,
"eval_sts-dev_pearson_dot": 0.11588852598989118,
"eval_sts-dev_pearson_euclidean": 0.16056667152911408,
"eval_sts-dev_pearson_manhattan": 0.15924268782656817,
"eval_sts-dev_pearson_max": 0.16056667152911408,
"eval_sts-dev_spearman_cosine": 0.16527466832006016,
"eval_sts-dev_spearman_dot": 0.10078185264794931,
"eval_sts-dev_spearman_euclidean": 0.1719215715191216,
"eval_sts-dev_spearman_manhattan": 0.170303502778187,
"eval_sts-dev_spearman_max": 0.1719215715191216,
"step": 1500
},
{
"epoch": 0.3168065895770632,
"grad_norm": 0.002368535613641143,
"learning_rate": 3.79774687555008e-05,
"loss": 0.0108,
"step": 2000
},
{
"epoch": 0.3168065895770632,
"eval_loss": 0.013646911829710007,
"eval_runtime": 83.1873,
"eval_samples_per_second": 60.105,
"eval_steps_per_second": 1.887,
"eval_sts-dev_pearson_cosine": 0.14207290429771552,
"eval_sts-dev_pearson_dot": 0.12219302310028649,
"eval_sts-dev_pearson_euclidean": 0.15249164642806468,
"eval_sts-dev_pearson_manhattan": 0.15099460310457263,
"eval_sts-dev_pearson_max": 0.15249164642806468,
"eval_sts-dev_spearman_cosine": 0.14567413155731176,
"eval_sts-dev_spearman_dot": 0.09939489795167657,
"eval_sts-dev_spearman_euclidean": 0.14960035136962835,
"eval_sts-dev_spearman_manhattan": 0.14872808741782187,
"eval_sts-dev_spearman_max": 0.14960035136962835,
"step": 2000
},
{
"epoch": 0.39600823697132903,
"grad_norm": 0.011403551325201988,
"learning_rate": 3.3576835064249254e-05,
"loss": 0.0121,
"step": 2500
},
{
"epoch": 0.39600823697132903,
"eval_loss": 0.015611983835697174,
"eval_runtime": 82.8829,
"eval_samples_per_second": 60.326,
"eval_steps_per_second": 1.894,
"eval_sts-dev_pearson_cosine": 0.1786266334873075,
"eval_sts-dev_pearson_dot": 0.16571459472812308,
"eval_sts-dev_pearson_euclidean": 0.19586188718253267,
"eval_sts-dev_pearson_manhattan": 0.19603778309890557,
"eval_sts-dev_pearson_max": 0.19603778309890557,
"eval_sts-dev_spearman_cosine": 0.20990140220242978,
"eval_sts-dev_spearman_dot": 0.1668269410484095,
"eval_sts-dev_spearman_euclidean": 0.21443661354146873,
"eval_sts-dev_spearman_manhattan": 0.21529338637929912,
"eval_sts-dev_spearman_max": 0.21529338637929912,
"step": 2500
},
{
"epoch": 0.4752098843655948,
"grad_norm": 0.004168146755546331,
"learning_rate": 2.9176201372997714e-05,
"loss": 0.0122,
"step": 3000
},
{
"epoch": 0.4752098843655948,
"eval_loss": 0.013952625915408134,
"eval_runtime": 82.7861,
"eval_samples_per_second": 60.397,
"eval_steps_per_second": 1.896,
"eval_sts-dev_pearson_cosine": 0.16089480397614714,
"eval_sts-dev_pearson_dot": 0.14333252395560012,
"eval_sts-dev_pearson_euclidean": 0.17458568236482797,
"eval_sts-dev_pearson_manhattan": 0.1736800184097837,
"eval_sts-dev_pearson_max": 0.17458568236482797,
"eval_sts-dev_spearman_cosine": 0.17227083866593193,
"eval_sts-dev_spearman_dot": 0.12781779851368713,
"eval_sts-dev_spearman_euclidean": 0.17703810710585532,
"eval_sts-dev_spearman_manhattan": 0.17557253669161538,
"eval_sts-dev_spearman_max": 0.17703810710585532,
"step": 3000
},
{
"epoch": 0.5544115317598606,
"grad_norm": 0.004282405134290457,
"learning_rate": 2.4775567681746174e-05,
"loss": 0.0125,
"step": 3500
},
{
"epoch": 0.5544115317598606,
"eval_loss": 0.011783541180193424,
"eval_runtime": 82.8159,
"eval_samples_per_second": 60.375,
"eval_steps_per_second": 1.896,
"eval_sts-dev_pearson_cosine": 0.18448919166260044,
"eval_sts-dev_pearson_dot": 0.17905699568214264,
"eval_sts-dev_pearson_euclidean": 0.20624063360858977,
"eval_sts-dev_pearson_manhattan": 0.205619351099398,
"eval_sts-dev_pearson_max": 0.20624063360858977,
"eval_sts-dev_spearman_cosine": 0.22476168122019577,
"eval_sts-dev_spearman_dot": 0.19305224567026316,
"eval_sts-dev_spearman_euclidean": 0.2268539691521332,
"eval_sts-dev_spearman_manhattan": 0.2268021523901189,
"eval_sts-dev_spearman_max": 0.2268539691521332,
"step": 3500
},
{
"epoch": 0.6336131791541264,
"grad_norm": 0.016134686768054962,
"learning_rate": 2.0374933990494634e-05,
"loss": 0.0079,
"step": 4000
},
{
"epoch": 0.6336131791541264,
"eval_loss": 0.011526196263730526,
"eval_runtime": 83.1812,
"eval_samples_per_second": 60.11,
"eval_steps_per_second": 1.887,
"eval_sts-dev_pearson_cosine": 0.16979017817169434,
"eval_sts-dev_pearson_dot": 0.16329193511035556,
"eval_sts-dev_pearson_euclidean": 0.2041557241070686,
"eval_sts-dev_pearson_manhattan": 0.20415667390135622,
"eval_sts-dev_pearson_max": 0.20415667390135622,
"eval_sts-dev_spearman_cosine": 0.23370816253094054,
"eval_sts-dev_spearman_dot": 0.19110938133669397,
"eval_sts-dev_spearman_euclidean": 0.23731458674719166,
"eval_sts-dev_spearman_manhattan": 0.2363744330684564,
"eval_sts-dev_spearman_max": 0.23731458674719166,
"step": 4000
},
{
"epoch": 0.7128148265483922,
"grad_norm": 0.10654988884925842,
"learning_rate": 1.5983101566625593e-05,
"loss": 0.0093,
"step": 4500
},
{
"epoch": 0.7128148265483922,
"eval_loss": 0.01042733620852232,
"eval_runtime": 82.4696,
"eval_samples_per_second": 60.628,
"eval_steps_per_second": 1.904,
"eval_sts-dev_pearson_cosine": 0.16883832363197002,
"eval_sts-dev_pearson_dot": 0.16172877878537467,
"eval_sts-dev_pearson_euclidean": 0.20343671061551505,
"eval_sts-dev_pearson_manhattan": 0.20269317144225543,
"eval_sts-dev_pearson_max": 0.20343671061551505,
"eval_sts-dev_spearman_cosine": 0.23314123864913222,
"eval_sts-dev_spearman_dot": 0.19029123486315452,
"eval_sts-dev_spearman_euclidean": 0.23747745874658102,
"eval_sts-dev_spearman_manhattan": 0.23681371095402073,
"eval_sts-dev_spearman_max": 0.23747745874658102,
"step": 4500
},
{
"epoch": 0.7920164739426581,
"grad_norm": 0.0034745726734399796,
"learning_rate": 1.1582467875374054e-05,
"loss": 0.0071,
"step": 5000
},
{
"epoch": 0.7920164739426581,
"eval_loss": 0.010719917714595795,
"eval_runtime": 82.5902,
"eval_samples_per_second": 60.54,
"eval_steps_per_second": 1.901,
"eval_sts-dev_pearson_cosine": 0.19565242771314767,
"eval_sts-dev_pearson_dot": 0.18923012649171922,
"eval_sts-dev_pearson_euclidean": 0.2278768429358364,
"eval_sts-dev_pearson_manhattan": 0.22768897126347665,
"eval_sts-dev_pearson_max": 0.2278768429358364,
"eval_sts-dev_spearman_cosine": 0.24240094548214325,
"eval_sts-dev_spearman_dot": 0.21284981986619678,
"eval_sts-dev_spearman_euclidean": 0.24732163285243935,
"eval_sts-dev_spearman_manhattan": 0.24736929658665446,
"eval_sts-dev_spearman_max": 0.24736929658665446,
"step": 5000
},
{
"epoch": 0.8712181213369238,
"grad_norm": 0.004818719811737537,
"learning_rate": 7.181834184122514e-06,
"loss": 0.0041,
"step": 5500
},
{
"epoch": 0.8712181213369238,
"eval_loss": 0.009991911239922047,
"eval_runtime": 82.1637,
"eval_samples_per_second": 60.854,
"eval_steps_per_second": 1.911,
"eval_sts-dev_pearson_cosine": 0.1892693811813182,
"eval_sts-dev_pearson_dot": 0.18290497740650222,
"eval_sts-dev_pearson_euclidean": 0.22463164519842746,
"eval_sts-dev_pearson_manhattan": 0.22460268853676083,
"eval_sts-dev_pearson_max": 0.22463164519842746,
"eval_sts-dev_spearman_cosine": 0.24628027091826607,
"eval_sts-dev_spearman_dot": 0.21127633301239485,
"eval_sts-dev_spearman_euclidean": 0.251238048387475,
"eval_sts-dev_spearman_manhattan": 0.2518403299451181,
"eval_sts-dev_spearman_max": 0.2518403299451181,
"step": 5500
},
{
"epoch": 0.9504197687311896,
"grad_norm": 0.012634661048650742,
"learning_rate": 2.7812004928709737e-06,
"loss": 0.0069,
"step": 6000
},
{
"epoch": 0.9504197687311896,
"eval_loss": 0.009837556630373001,
"eval_runtime": 83.3155,
"eval_samples_per_second": 60.013,
"eval_steps_per_second": 1.884,
"eval_sts-dev_pearson_cosine": 0.19373258731869963,
"eval_sts-dev_pearson_dot": 0.18775862207030505,
"eval_sts-dev_pearson_euclidean": 0.22537635202224982,
"eval_sts-dev_pearson_manhattan": 0.2245827911400446,
"eval_sts-dev_pearson_max": 0.22537635202224982,
"eval_sts-dev_spearman_cosine": 0.24307341815427166,
"eval_sts-dev_spearman_dot": 0.2124049530103558,
"eval_sts-dev_spearman_euclidean": 0.24695143686545143,
"eval_sts-dev_spearman_manhattan": 0.2468102784042943,
"eval_sts-dev_spearman_max": 0.24695143686545143,
"step": 6000
}
],
"logging_steps": 500,
"max_steps": 6313,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 32,
"trial_name": null,
"trial_params": null
}
|