|
{ |
|
"best_metric": 0.8634598271995685, |
|
"best_model_checkpoint": "result/my-unsup-simcse-bert-base-uncased", |
|
"epoch": 1.0, |
|
"global_step": 8909, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01, |
|
"eval_avg_sts": 0.8421121351950032, |
|
"eval_sickr_spearman": 0.820764443190438, |
|
"eval_stsb_spearman": 0.8634598271995685, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"eval_avg_sts": 0.8272876260858589, |
|
"eval_sickr_spearman": 0.8016297649684213, |
|
"eval_stsb_spearman": 0.8529454872032965, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"eval_avg_sts": 0.8349849313583235, |
|
"eval_sickr_spearman": 0.8131009368626829, |
|
"eval_stsb_spearman": 0.8568689258539641, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 2.831630935009541e-05, |
|
"loss": 0.0001, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"eval_avg_sts": 0.8324840345106272, |
|
"eval_sickr_spearman": 0.8115295033169321, |
|
"eval_stsb_spearman": 0.8534385657043223, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"eval_avg_sts": 0.8342309434678776, |
|
"eval_sickr_spearman": 0.8101102323005372, |
|
"eval_stsb_spearman": 0.858351654635218, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"eval_avg_sts": 0.8358817878428014, |
|
"eval_sickr_spearman": 0.8152456216040973, |
|
"eval_stsb_spearman": 0.8565179540815056, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"eval_avg_sts": 0.8337641137899596, |
|
"eval_sickr_spearman": 0.8073721233008548, |
|
"eval_stsb_spearman": 0.8601561042790645, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 2.663261870019082e-05, |
|
"loss": 0.0001, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"eval_avg_sts": 0.8318439410449322, |
|
"eval_sickr_spearman": 0.811522586838325, |
|
"eval_stsb_spearman": 0.8521652952515394, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"eval_avg_sts": 0.8304909909975411, |
|
"eval_sickr_spearman": 0.8131338381671679, |
|
"eval_stsb_spearman": 0.8478481438279143, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"eval_avg_sts": 0.829039779377259, |
|
"eval_sickr_spearman": 0.8045460213233376, |
|
"eval_stsb_spearman": 0.8535335374311805, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"eval_avg_sts": 0.8307787364299535, |
|
"eval_sickr_spearman": 0.8137019980660793, |
|
"eval_stsb_spearman": 0.8478554747938275, |
|
"step": 1375 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 2.4948928050286226e-05, |
|
"loss": 0.0001, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"eval_avg_sts": 0.831658365337354, |
|
"eval_sickr_spearman": 0.8159185373352453, |
|
"eval_stsb_spearman": 0.8473981933394628, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"eval_avg_sts": 0.8294868187051994, |
|
"eval_sickr_spearman": 0.8144402841262847, |
|
"eval_stsb_spearman": 0.8445333532841142, |
|
"step": 1625 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"eval_avg_sts": 0.8268186287541337, |
|
"eval_sickr_spearman": 0.8082519570169981, |
|
"eval_stsb_spearman": 0.8453853004912691, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"eval_avg_sts": 0.8313195396147719, |
|
"eval_sickr_spearman": 0.8177095210456699, |
|
"eval_stsb_spearman": 0.844929558183874, |
|
"step": 1875 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 2.3265237400381636e-05, |
|
"loss": 0.0001, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"eval_avg_sts": 0.8266715243566689, |
|
"eval_sickr_spearman": 0.8150033527284436, |
|
"eval_stsb_spearman": 0.8383396959848942, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"eval_avg_sts": 0.8293624623711227, |
|
"eval_sickr_spearman": 0.8175795488851783, |
|
"eval_stsb_spearman": 0.841145375857067, |
|
"step": 2125 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"eval_avg_sts": 0.8322036045569485, |
|
"eval_sickr_spearman": 0.8175970802372032, |
|
"eval_stsb_spearman": 0.8468101288766937, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"eval_avg_sts": 0.8314845597378098, |
|
"eval_sickr_spearman": 0.8156567678324077, |
|
"eval_stsb_spearman": 0.8473123516432118, |
|
"step": 2375 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 2.1581546750477045e-05, |
|
"loss": 0.0001, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"eval_avg_sts": 0.8298162063992072, |
|
"eval_sickr_spearman": 0.8121817176433601, |
|
"eval_stsb_spearman": 0.8474506951550543, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"eval_avg_sts": 0.8288600170060807, |
|
"eval_sickr_spearman": 0.8109708535761053, |
|
"eval_stsb_spearman": 0.8467491804360561, |
|
"step": 2625 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"eval_avg_sts": 0.8307741487193253, |
|
"eval_sickr_spearman": 0.8109006321058029, |
|
"eval_stsb_spearman": 0.8506476653328477, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"eval_avg_sts": 0.8299073386983135, |
|
"eval_sickr_spearman": 0.8106493814141802, |
|
"eval_stsb_spearman": 0.8491652959824467, |
|
"step": 2875 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 1.9897856100572458e-05, |
|
"loss": 0.0, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"eval_avg_sts": 0.82947527087573, |
|
"eval_sickr_spearman": 0.808709645382602, |
|
"eval_stsb_spearman": 0.8502408963688581, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"eval_avg_sts": 0.8247286673217802, |
|
"eval_sickr_spearman": 0.8044403048690724, |
|
"eval_stsb_spearman": 0.8450170297744881, |
|
"step": 3125 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"eval_avg_sts": 0.8219332102471997, |
|
"eval_sickr_spearman": 0.806001651883521, |
|
"eval_stsb_spearman": 0.8378647686108784, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"eval_avg_sts": 0.8111430899888176, |
|
"eval_sickr_spearman": 0.7991139439061943, |
|
"eval_stsb_spearman": 0.8231722360714409, |
|
"step": 3375 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 1.8214165450667864e-05, |
|
"loss": 0.0001, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"eval_avg_sts": 0.807692571896139, |
|
"eval_sickr_spearman": 0.7927039052326695, |
|
"eval_stsb_spearman": 0.8226812385596084, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"eval_avg_sts": 0.8135397552735281, |
|
"eval_sickr_spearman": 0.7942567027110624, |
|
"eval_stsb_spearman": 0.8328228078359937, |
|
"step": 3625 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"eval_avg_sts": 0.8047796899452875, |
|
"eval_sickr_spearman": 0.7930084224157872, |
|
"eval_stsb_spearman": 0.8165509574747878, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"eval_avg_sts": 0.8132531030215364, |
|
"eval_sickr_spearman": 0.7956294796212651, |
|
"eval_stsb_spearman": 0.8308767264218079, |
|
"step": 3875 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 1.6530474800763274e-05, |
|
"loss": 0.0001, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"eval_avg_sts": 0.8062306897971802, |
|
"eval_sickr_spearman": 0.7966211297415562, |
|
"eval_stsb_spearman": 0.8158402498528041, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"eval_avg_sts": 0.8163206710413518, |
|
"eval_sickr_spearman": 0.80029277122879, |
|
"eval_stsb_spearman": 0.8323485708539136, |
|
"step": 4125 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"eval_avg_sts": 0.8165420284081878, |
|
"eval_sickr_spearman": 0.8011630467557471, |
|
"eval_stsb_spearman": 0.8319210100606285, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"eval_avg_sts": 0.8187419551614288, |
|
"eval_sickr_spearman": 0.8018022926847871, |
|
"eval_stsb_spearman": 0.8356816176380705, |
|
"step": 4375 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 1.4846784150858683e-05, |
|
"loss": 0.0, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"eval_avg_sts": 0.8195743034210232, |
|
"eval_sickr_spearman": 0.8021342836579273, |
|
"eval_stsb_spearman": 0.8370143231841193, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"eval_avg_sts": 0.8193325419945161, |
|
"eval_sickr_spearman": 0.8013091573663218, |
|
"eval_stsb_spearman": 0.8373559266227103, |
|
"step": 4625 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"eval_avg_sts": 0.8183510820774517, |
|
"eval_sickr_spearman": 0.7999737966841394, |
|
"eval_stsb_spearman": 0.836728367470764, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"eval_avg_sts": 0.8168697079122389, |
|
"eval_sickr_spearman": 0.7952836076598091, |
|
"eval_stsb_spearman": 0.8384558081646686, |
|
"step": 4875 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 1.3163093500954093e-05, |
|
"loss": 0.0001, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"eval_avg_sts": 0.8170635469307033, |
|
"eval_sickr_spearman": 0.796297256024559, |
|
"eval_stsb_spearman": 0.8378298378368476, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"eval_avg_sts": 0.8188600980624421, |
|
"eval_sickr_spearman": 0.7980452999302978, |
|
"eval_stsb_spearman": 0.8396748961945864, |
|
"step": 5125 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"eval_avg_sts": 0.8179261372960408, |
|
"eval_sickr_spearman": 0.7982228709123145, |
|
"eval_stsb_spearman": 0.8376294036797671, |
|
"step": 5250 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"eval_avg_sts": 0.8152302038847802, |
|
"eval_sickr_spearman": 0.8007572319796964, |
|
"eval_stsb_spearman": 0.829703175789864, |
|
"step": 5375 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.14794028510495e-05, |
|
"loss": 0.0, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"eval_avg_sts": 0.8173976618782237, |
|
"eval_sickr_spearman": 0.8015423483638039, |
|
"eval_stsb_spearman": 0.8332529753926434, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"eval_avg_sts": 0.8175197579562594, |
|
"eval_sickr_spearman": 0.8017781330407636, |
|
"eval_stsb_spearman": 0.8332613828717551, |
|
"step": 5625 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"eval_avg_sts": 0.8155439357729999, |
|
"eval_sickr_spearman": 0.8033175778729565, |
|
"eval_stsb_spearman": 0.8277702936730432, |
|
"step": 5750 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"eval_avg_sts": 0.816245667230588, |
|
"eval_sickr_spearman": 0.8031514863241836, |
|
"eval_stsb_spearman": 0.8293398481369924, |
|
"step": 5875 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 9.79571220114491e-06, |
|
"loss": 0.0, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"eval_avg_sts": 0.8173247407144557, |
|
"eval_sickr_spearman": 0.802800715190381, |
|
"eval_stsb_spearman": 0.8318487662385303, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"eval_avg_sts": 0.8175691152062283, |
|
"eval_sickr_spearman": 0.8076647768019173, |
|
"eval_stsb_spearman": 0.8274734536105394, |
|
"step": 6125 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"eval_avg_sts": 0.8185213107446343, |
|
"eval_sickr_spearman": 0.805431666802755, |
|
"eval_stsb_spearman": 0.8316109546865137, |
|
"step": 6250 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"eval_avg_sts": 0.8186800157417145, |
|
"eval_sickr_spearman": 0.8050108663230556, |
|
"eval_stsb_spearman": 0.8323491651603734, |
|
"step": 6375 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 8.112021551240318e-06, |
|
"loss": 0.0001, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"eval_avg_sts": 0.8188678392487967, |
|
"eval_sickr_spearman": 0.8050220575696909, |
|
"eval_stsb_spearman": 0.8327136209279026, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"eval_avg_sts": 0.8176842353525476, |
|
"eval_sickr_spearman": 0.805601264621933, |
|
"eval_stsb_spearman": 0.8297672060831622, |
|
"step": 6625 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"eval_avg_sts": 0.8176889108605423, |
|
"eval_sickr_spearman": 0.8056947811764331, |
|
"eval_stsb_spearman": 0.8296830405446516, |
|
"step": 6750 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"eval_avg_sts": 0.8196884531122979, |
|
"eval_sickr_spearman": 0.8050268606798346, |
|
"eval_stsb_spearman": 0.8343500455447611, |
|
"step": 6875 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 6.428330901335728e-06, |
|
"loss": 0.0001, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"eval_avg_sts": 0.8202633038048985, |
|
"eval_sickr_spearman": 0.8042502938317833, |
|
"eval_stsb_spearman": 0.8362763137780139, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"eval_avg_sts": 0.819155219728974, |
|
"eval_sickr_spearman": 0.8032414485771772, |
|
"eval_stsb_spearman": 0.8350689908807707, |
|
"step": 7125 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"eval_avg_sts": 0.8185946178321757, |
|
"eval_sickr_spearman": 0.8011783206460045, |
|
"eval_stsb_spearman": 0.836010915018347, |
|
"step": 7250 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"eval_avg_sts": 0.8201412949742906, |
|
"eval_sickr_spearman": 0.8025524424270474, |
|
"eval_stsb_spearman": 0.8377301475215336, |
|
"step": 7375 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 4.744640251431137e-06, |
|
"loss": 0.0, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"eval_avg_sts": 0.8212551171670207, |
|
"eval_sickr_spearman": 0.8034978385966538, |
|
"eval_stsb_spearman": 0.8390123957373876, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"eval_avg_sts": 0.818850668226643, |
|
"eval_sickr_spearman": 0.8027982175731062, |
|
"eval_stsb_spearman": 0.8349031188801797, |
|
"step": 7625 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"eval_avg_sts": 0.8184223281084055, |
|
"eval_sickr_spearman": 0.8041640299736004, |
|
"eval_stsb_spearman": 0.8326806262432108, |
|
"step": 7750 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"eval_avg_sts": 0.818812249944629, |
|
"eval_sickr_spearman": 0.8036731040858015, |
|
"eval_stsb_spearman": 0.8339513958034565, |
|
"step": 7875 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 3.0609496015265466e-06, |
|
"loss": 0.0, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"eval_avg_sts": 0.8204302396512207, |
|
"eval_sickr_spearman": 0.8052288794924833, |
|
"eval_stsb_spearman": 0.8356315998099582, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"eval_avg_sts": 0.8209129013628449, |
|
"eval_sickr_spearman": 0.8057768663287909, |
|
"eval_stsb_spearman": 0.8360489363968989, |
|
"step": 8125 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"eval_avg_sts": 0.8191227480526566, |
|
"eval_sickr_spearman": 0.8051447290027637, |
|
"eval_stsb_spearman": 0.8331007671025495, |
|
"step": 8250 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"eval_avg_sts": 0.8188352393246607, |
|
"eval_sickr_spearman": 0.804106104465266, |
|
"eval_stsb_spearman": 0.8335643741840555, |
|
"step": 8375 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 1.3772589516219555e-06, |
|
"loss": 0.0001, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"eval_avg_sts": 0.8199749779902297, |
|
"eval_sickr_spearman": 0.8036332863027092, |
|
"eval_stsb_spearman": 0.8363166696777501, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"eval_avg_sts": 0.8199803565006372, |
|
"eval_sickr_spearman": 0.8035859756677927, |
|
"eval_stsb_spearman": 0.8363747373334816, |
|
"step": 8625 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"eval_avg_sts": 0.8201168764945796, |
|
"eval_sickr_spearman": 0.803825650863969, |
|
"eval_stsb_spearman": 0.8364081021251903, |
|
"step": 8750 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_avg_sts": 0.820079455438296, |
|
"eval_sickr_spearman": 0.8038641237762209, |
|
"eval_stsb_spearman": 0.8362947871003711, |
|
"step": 8875 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 8909, |
|
"train_runtime": 4791.076, |
|
"train_samples_per_second": 1.859 |
|
} |
|
], |
|
"max_steps": 8909, |
|
"num_train_epochs": 1, |
|
"total_flos": 24044429181321216, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|