|
{ |
|
"best_metric": 0.88762098828324, |
|
"best_model_checkpoint": "test-glue/checkpoint-98176", |
|
"epoch": 4.0, |
|
"global_step": 98176, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.020371577574967405, |
|
"learning_rate": 1.9898142112125166e-05, |
|
"loss": 0.659133544921875, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.04074315514993481, |
|
"learning_rate": 1.9796284224250328e-05, |
|
"loss": 0.5110537109375, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.06111473272490222, |
|
"learning_rate": 1.969442633637549e-05, |
|
"loss": 0.485730224609375, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.08148631029986962, |
|
"learning_rate": 1.9592568448500654e-05, |
|
"loss": 0.48240771484375, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.10185788787483703, |
|
"learning_rate": 1.9490710560625816e-05, |
|
"loss": 0.46434228515625, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.12222946544980444, |
|
"learning_rate": 1.938885267275098e-05, |
|
"loss": 0.449291259765625, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.14260104302477183, |
|
"learning_rate": 1.9286994784876142e-05, |
|
"loss": 0.4585693359375, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.16297262059973924, |
|
"learning_rate": 1.9185136897001307e-05, |
|
"loss": 0.44105908203125, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.18334419817470665, |
|
"learning_rate": 1.9083279009126468e-05, |
|
"loss": 0.444558837890625, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.20371577574967406, |
|
"learning_rate": 1.898142112125163e-05, |
|
"loss": 0.45100146484375, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.22408735332464147, |
|
"learning_rate": 1.8879563233376795e-05, |
|
"loss": 0.448494140625, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.24445893089960888, |
|
"learning_rate": 1.8777705345501956e-05, |
|
"loss": 0.4301044921875, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.2648305084745763, |
|
"learning_rate": 1.867584745762712e-05, |
|
"loss": 0.42643359375, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.28520208604954367, |
|
"learning_rate": 1.8573989569752282e-05, |
|
"loss": 0.41756689453125, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.3055736636245111, |
|
"learning_rate": 1.8472131681877447e-05, |
|
"loss": 0.4308828125, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.3259452411994785, |
|
"learning_rate": 1.837027379400261e-05, |
|
"loss": 0.41683154296875, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.34631681877444587, |
|
"learning_rate": 1.8268415906127773e-05, |
|
"loss": 0.413720703125, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 0.3666883963494133, |
|
"learning_rate": 1.8166558018252935e-05, |
|
"loss": 0.425869140625, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.3870599739243807, |
|
"learning_rate": 1.80647001303781e-05, |
|
"loss": 0.42243359375, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 0.4074315514993481, |
|
"learning_rate": 1.796284224250326e-05, |
|
"loss": 0.415326171875, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.4278031290743155, |
|
"learning_rate": 1.7860984354628423e-05, |
|
"loss": 0.4072890625, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 0.44817470664928294, |
|
"learning_rate": 1.7759126466753587e-05, |
|
"loss": 0.4062841796875, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.4685462842242503, |
|
"learning_rate": 1.765726857887875e-05, |
|
"loss": 0.3890205078125, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 0.48891786179921776, |
|
"learning_rate": 1.7555410691003914e-05, |
|
"loss": 0.3969296875, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.5092894393741851, |
|
"learning_rate": 1.7453552803129075e-05, |
|
"loss": 0.3952880859375, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 0.5296610169491526, |
|
"learning_rate": 1.7351694915254237e-05, |
|
"loss": 0.41177734375, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 0.5500325945241199, |
|
"learning_rate": 1.72498370273794e-05, |
|
"loss": 0.3807109375, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 0.5704041720990873, |
|
"learning_rate": 1.7147979139504566e-05, |
|
"loss": 0.403853515625, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 0.5907757496740548, |
|
"learning_rate": 1.7046121251629728e-05, |
|
"loss": 0.411607421875, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 0.6111473272490222, |
|
"learning_rate": 1.6944263363754893e-05, |
|
"loss": 0.3996162109375, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 0.6315189048239895, |
|
"learning_rate": 1.6842405475880054e-05, |
|
"loss": 0.3948984375, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 0.651890482398957, |
|
"learning_rate": 1.6740547588005215e-05, |
|
"loss": 0.389115234375, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 0.6722620599739244, |
|
"learning_rate": 1.663868970013038e-05, |
|
"loss": 0.403466796875, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 0.6926336375488917, |
|
"learning_rate": 1.6536831812255542e-05, |
|
"loss": 0.4059140625, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 0.7130052151238592, |
|
"learning_rate": 1.6434973924380707e-05, |
|
"loss": 0.3873095703125, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 0.7333767926988266, |
|
"learning_rate": 1.6333116036505868e-05, |
|
"loss": 0.3887783203125, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 0.753748370273794, |
|
"learning_rate": 1.623125814863103e-05, |
|
"loss": 0.395041015625, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 0.7741199478487614, |
|
"learning_rate": 1.6129400260756194e-05, |
|
"loss": 0.39855078125, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 0.7944915254237288, |
|
"learning_rate": 1.6027542372881356e-05, |
|
"loss": 0.390134765625, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 0.8148631029986962, |
|
"learning_rate": 1.592568448500652e-05, |
|
"loss": 0.3717578125, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 0.8352346805736637, |
|
"learning_rate": 1.5823826597131685e-05, |
|
"loss": 0.387556640625, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 0.855606258148631, |
|
"learning_rate": 1.5721968709256844e-05, |
|
"loss": 0.389892578125, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 0.8759778357235984, |
|
"learning_rate": 1.562011082138201e-05, |
|
"loss": 0.38671875, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 0.8963494132985659, |
|
"learning_rate": 1.5518252933507173e-05, |
|
"loss": 0.36665234375, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 0.9167209908735332, |
|
"learning_rate": 1.5416395045632335e-05, |
|
"loss": 0.3940859375, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 0.9370925684485006, |
|
"learning_rate": 1.53145371577575e-05, |
|
"loss": 0.382552734375, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 0.9574641460234681, |
|
"learning_rate": 1.5212679269882663e-05, |
|
"loss": 0.38312109375, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 0.9778357235984355, |
|
"learning_rate": 1.5110821382007822e-05, |
|
"loss": 0.3804765625, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 0.9982073011734028, |
|
"learning_rate": 1.5008963494132987e-05, |
|
"loss": 0.383294921875, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.8807947019867549, |
|
"eval_loss": 0.3417690396308899, |
|
"step": 24544 |
|
}, |
|
{ |
|
"epoch": 1.0185788787483703, |
|
"learning_rate": 1.490710560625815e-05, |
|
"loss": 0.293595703125, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 1.0389504563233376, |
|
"learning_rate": 1.4805247718383314e-05, |
|
"loss": 0.29225, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 1.0593220338983051, |
|
"learning_rate": 1.4703389830508477e-05, |
|
"loss": 0.294130859375, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 1.0796936114732725, |
|
"learning_rate": 1.4601531942633638e-05, |
|
"loss": 0.28494921875, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 1.1000651890482398, |
|
"learning_rate": 1.4499674054758801e-05, |
|
"loss": 0.28199609375, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 1.1204367666232073, |
|
"learning_rate": 1.4397816166883964e-05, |
|
"loss": 0.28830078125, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 1.1408083441981747, |
|
"learning_rate": 1.4295958279009128e-05, |
|
"loss": 0.283265625, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 1.161179921773142, |
|
"learning_rate": 1.419410039113429e-05, |
|
"loss": 0.284984375, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 1.1815514993481095, |
|
"learning_rate": 1.4092242503259454e-05, |
|
"loss": 0.2741875, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 1.2019230769230769, |
|
"learning_rate": 1.3990384615384615e-05, |
|
"loss": 0.293828125, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 1.2222946544980444, |
|
"learning_rate": 1.3888526727509778e-05, |
|
"loss": 0.291970703125, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 1.2426662320730117, |
|
"learning_rate": 1.3786668839634942e-05, |
|
"loss": 0.2823515625, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 1.263037809647979, |
|
"learning_rate": 1.3684810951760106e-05, |
|
"loss": 0.2898671875, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 1.2834093872229466, |
|
"learning_rate": 1.358295306388527e-05, |
|
"loss": 0.291859375, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 1.303780964797914, |
|
"learning_rate": 1.3481095176010431e-05, |
|
"loss": 0.292462890625, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 1.3241525423728815, |
|
"learning_rate": 1.3379237288135594e-05, |
|
"loss": 0.29555859375, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 1.3445241199478488, |
|
"learning_rate": 1.3277379400260757e-05, |
|
"loss": 0.288212890625, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 1.3648956975228161, |
|
"learning_rate": 1.317552151238592e-05, |
|
"loss": 0.294849609375, |
|
"step": 33500 |
|
}, |
|
{ |
|
"epoch": 1.3852672750977835, |
|
"learning_rate": 1.3073663624511084e-05, |
|
"loss": 0.289576171875, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 1.405638852672751, |
|
"learning_rate": 1.2971805736636247e-05, |
|
"loss": 0.289185546875, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 1.4260104302477183, |
|
"learning_rate": 1.2869947848761408e-05, |
|
"loss": 0.283068359375, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 1.4463820078226859, |
|
"learning_rate": 1.2768089960886571e-05, |
|
"loss": 0.28625390625, |
|
"step": 35500 |
|
}, |
|
{ |
|
"epoch": 1.4667535853976532, |
|
"learning_rate": 1.2666232073011735e-05, |
|
"loss": 0.282048828125, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 1.4871251629726205, |
|
"learning_rate": 1.2564374185136898e-05, |
|
"loss": 0.28896484375, |
|
"step": 36500 |
|
}, |
|
{ |
|
"epoch": 1.5074967405475879, |
|
"learning_rate": 1.2462516297262063e-05, |
|
"loss": 0.276443359375, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 1.5278683181225554, |
|
"learning_rate": 1.2360658409387226e-05, |
|
"loss": 0.310044921875, |
|
"step": 37500 |
|
}, |
|
{ |
|
"epoch": 1.548239895697523, |
|
"learning_rate": 1.2258800521512385e-05, |
|
"loss": 0.285341796875, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 1.5686114732724903, |
|
"learning_rate": 1.215694263363755e-05, |
|
"loss": 0.282486328125, |
|
"step": 38500 |
|
}, |
|
{ |
|
"epoch": 1.5889830508474576, |
|
"learning_rate": 1.2055084745762713e-05, |
|
"loss": 0.295353515625, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 1.609354628422425, |
|
"learning_rate": 1.1953226857887877e-05, |
|
"loss": 0.284724609375, |
|
"step": 39500 |
|
}, |
|
{ |
|
"epoch": 1.6297262059973925, |
|
"learning_rate": 1.185136897001304e-05, |
|
"loss": 0.286890625, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 1.6500977835723598, |
|
"learning_rate": 1.1749511082138201e-05, |
|
"loss": 0.278595703125, |
|
"step": 40500 |
|
}, |
|
{ |
|
"epoch": 1.6704693611473274, |
|
"learning_rate": 1.1647653194263364e-05, |
|
"loss": 0.30148046875, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 1.6908409387222947, |
|
"learning_rate": 1.1545795306388527e-05, |
|
"loss": 0.274353515625, |
|
"step": 41500 |
|
}, |
|
{ |
|
"epoch": 1.711212516297262, |
|
"learning_rate": 1.144393741851369e-05, |
|
"loss": 0.29472265625, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 1.7315840938722293, |
|
"learning_rate": 1.1342079530638854e-05, |
|
"loss": 0.2853359375, |
|
"step": 42500 |
|
}, |
|
{ |
|
"epoch": 1.7519556714471969, |
|
"learning_rate": 1.1240221642764017e-05, |
|
"loss": 0.286759765625, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 1.7723272490221644, |
|
"learning_rate": 1.1138363754889178e-05, |
|
"loss": 0.28123046875, |
|
"step": 43500 |
|
}, |
|
{ |
|
"epoch": 1.7926988265971318, |
|
"learning_rate": 1.1036505867014341e-05, |
|
"loss": 0.2833515625, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 1.813070404172099, |
|
"learning_rate": 1.0934647979139506e-05, |
|
"loss": 0.281861328125, |
|
"step": 44500 |
|
}, |
|
{ |
|
"epoch": 1.8334419817470664, |
|
"learning_rate": 1.083279009126467e-05, |
|
"loss": 0.2866171875, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 1.8538135593220337, |
|
"learning_rate": 1.0730932203389833e-05, |
|
"loss": 0.28151953125, |
|
"step": 45500 |
|
}, |
|
{ |
|
"epoch": 1.8741851368970013, |
|
"learning_rate": 1.0629074315514994e-05, |
|
"loss": 0.285787109375, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 1.8945567144719688, |
|
"learning_rate": 1.0527216427640157e-05, |
|
"loss": 0.28494921875, |
|
"step": 46500 |
|
}, |
|
{ |
|
"epoch": 1.9149282920469362, |
|
"learning_rate": 1.042535853976532e-05, |
|
"loss": 0.281390625, |
|
"step": 47000 |
|
}, |
|
{ |
|
"epoch": 1.9352998696219035, |
|
"learning_rate": 1.0323500651890483e-05, |
|
"loss": 0.2822734375, |
|
"step": 47500 |
|
}, |
|
{ |
|
"epoch": 1.9556714471968708, |
|
"learning_rate": 1.0221642764015647e-05, |
|
"loss": 0.2885546875, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 1.9760430247718384, |
|
"learning_rate": 1.011978487614081e-05, |
|
"loss": 0.2759296875, |
|
"step": 48500 |
|
}, |
|
{ |
|
"epoch": 1.996414602346806, |
|
"learning_rate": 1.0017926988265971e-05, |
|
"loss": 0.27166796875, |
|
"step": 49000 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.8865002547121752, |
|
"eval_loss": 0.34813082218170166, |
|
"step": 49088 |
|
}, |
|
{ |
|
"epoch": 2.0167861799217732, |
|
"learning_rate": 9.916069100391134e-06, |
|
"loss": 0.2067265625, |
|
"step": 49500 |
|
}, |
|
{ |
|
"epoch": 2.0371577574967406, |
|
"learning_rate": 9.814211212516298e-06, |
|
"loss": 0.18627734375, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 2.057529335071708, |
|
"learning_rate": 9.71235332464146e-06, |
|
"loss": 0.20591796875, |
|
"step": 50500 |
|
}, |
|
{ |
|
"epoch": 2.077900912646675, |
|
"learning_rate": 9.610495436766624e-06, |
|
"loss": 0.207765625, |
|
"step": 51000 |
|
}, |
|
{ |
|
"epoch": 2.098272490221643, |
|
"learning_rate": 9.508637548891787e-06, |
|
"loss": 0.200046875, |
|
"step": 51500 |
|
}, |
|
{ |
|
"epoch": 2.1186440677966103, |
|
"learning_rate": 9.40677966101695e-06, |
|
"loss": 0.20955078125, |
|
"step": 52000 |
|
}, |
|
{ |
|
"epoch": 2.1390156453715776, |
|
"learning_rate": 9.304921773142113e-06, |
|
"loss": 0.20408203125, |
|
"step": 52500 |
|
}, |
|
{ |
|
"epoch": 2.159387222946545, |
|
"learning_rate": 9.203063885267276e-06, |
|
"loss": 0.1967578125, |
|
"step": 53000 |
|
}, |
|
{ |
|
"epoch": 2.1797588005215123, |
|
"learning_rate": 9.101205997392438e-06, |
|
"loss": 0.2086171875, |
|
"step": 53500 |
|
}, |
|
{ |
|
"epoch": 2.2001303780964796, |
|
"learning_rate": 8.999348109517601e-06, |
|
"loss": 0.19387890625, |
|
"step": 54000 |
|
}, |
|
{ |
|
"epoch": 2.2205019556714474, |
|
"learning_rate": 8.897490221642766e-06, |
|
"loss": 0.2149140625, |
|
"step": 54500 |
|
}, |
|
{ |
|
"epoch": 2.2408735332464147, |
|
"learning_rate": 8.795632333767927e-06, |
|
"loss": 0.20816015625, |
|
"step": 55000 |
|
}, |
|
{ |
|
"epoch": 2.261245110821382, |
|
"learning_rate": 8.69377444589309e-06, |
|
"loss": 0.1981796875, |
|
"step": 55500 |
|
}, |
|
{ |
|
"epoch": 2.2816166883963493, |
|
"learning_rate": 8.591916558018254e-06, |
|
"loss": 0.20524609375, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 2.3019882659713167, |
|
"learning_rate": 8.490058670143417e-06, |
|
"loss": 0.20031640625, |
|
"step": 56500 |
|
}, |
|
{ |
|
"epoch": 2.322359843546284, |
|
"learning_rate": 8.38820078226858e-06, |
|
"loss": 0.2170234375, |
|
"step": 57000 |
|
}, |
|
{ |
|
"epoch": 2.3427314211212518, |
|
"learning_rate": 8.286342894393743e-06, |
|
"loss": 0.202625, |
|
"step": 57500 |
|
}, |
|
{ |
|
"epoch": 2.363102998696219, |
|
"learning_rate": 8.184485006518904e-06, |
|
"loss": 0.21112890625, |
|
"step": 58000 |
|
}, |
|
{ |
|
"epoch": 2.3834745762711864, |
|
"learning_rate": 8.08262711864407e-06, |
|
"loss": 0.19984375, |
|
"step": 58500 |
|
}, |
|
{ |
|
"epoch": 2.4038461538461537, |
|
"learning_rate": 7.980769230769232e-06, |
|
"loss": 0.2016015625, |
|
"step": 59000 |
|
}, |
|
{ |
|
"epoch": 2.424217731421121, |
|
"learning_rate": 7.878911342894394e-06, |
|
"loss": 0.20296875, |
|
"step": 59500 |
|
}, |
|
{ |
|
"epoch": 2.444589308996089, |
|
"learning_rate": 7.777053455019557e-06, |
|
"loss": 0.20494140625, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 2.464960886571056, |
|
"learning_rate": 7.67519556714472e-06, |
|
"loss": 0.19887890625, |
|
"step": 60500 |
|
}, |
|
{ |
|
"epoch": 2.4853324641460235, |
|
"learning_rate": 7.573337679269883e-06, |
|
"loss": 0.2058984375, |
|
"step": 61000 |
|
}, |
|
{ |
|
"epoch": 2.505704041720991, |
|
"learning_rate": 7.4714797913950464e-06, |
|
"loss": 0.2057265625, |
|
"step": 61500 |
|
}, |
|
{ |
|
"epoch": 2.526075619295958, |
|
"learning_rate": 7.369621903520209e-06, |
|
"loss": 0.1991484375, |
|
"step": 62000 |
|
}, |
|
{ |
|
"epoch": 2.5464471968709255, |
|
"learning_rate": 7.267764015645372e-06, |
|
"loss": 0.2016328125, |
|
"step": 62500 |
|
}, |
|
{ |
|
"epoch": 2.5668187744458932, |
|
"learning_rate": 7.165906127770536e-06, |
|
"loss": 0.2006640625, |
|
"step": 63000 |
|
}, |
|
{ |
|
"epoch": 2.5871903520208606, |
|
"learning_rate": 7.064048239895698e-06, |
|
"loss": 0.20136328125, |
|
"step": 63500 |
|
}, |
|
{ |
|
"epoch": 2.607561929595828, |
|
"learning_rate": 6.962190352020861e-06, |
|
"loss": 0.210953125, |
|
"step": 64000 |
|
}, |
|
{ |
|
"epoch": 2.627933507170795, |
|
"learning_rate": 6.8603324641460245e-06, |
|
"loss": 0.20728515625, |
|
"step": 64500 |
|
}, |
|
{ |
|
"epoch": 2.648305084745763, |
|
"learning_rate": 6.758474576271187e-06, |
|
"loss": 0.19809765625, |
|
"step": 65000 |
|
}, |
|
{ |
|
"epoch": 2.6686766623207303, |
|
"learning_rate": 6.65661668839635e-06, |
|
"loss": 0.20791796875, |
|
"step": 65500 |
|
}, |
|
{ |
|
"epoch": 2.6890482398956976, |
|
"learning_rate": 6.554758800521513e-06, |
|
"loss": 0.1968359375, |
|
"step": 66000 |
|
}, |
|
{ |
|
"epoch": 2.709419817470665, |
|
"learning_rate": 6.452900912646675e-06, |
|
"loss": 0.19569140625, |
|
"step": 66500 |
|
}, |
|
{ |
|
"epoch": 2.7297913950456323, |
|
"learning_rate": 6.351043024771839e-06, |
|
"loss": 0.21024609375, |
|
"step": 67000 |
|
}, |
|
{ |
|
"epoch": 2.7501629726205996, |
|
"learning_rate": 6.249185136897002e-06, |
|
"loss": 0.20680859375, |
|
"step": 67500 |
|
}, |
|
{ |
|
"epoch": 2.770534550195567, |
|
"learning_rate": 6.147327249022165e-06, |
|
"loss": 0.206203125, |
|
"step": 68000 |
|
}, |
|
{ |
|
"epoch": 2.7909061277705347, |
|
"learning_rate": 6.045469361147328e-06, |
|
"loss": 0.1981015625, |
|
"step": 68500 |
|
}, |
|
{ |
|
"epoch": 2.811277705345502, |
|
"learning_rate": 5.94361147327249e-06, |
|
"loss": 0.20408984375, |
|
"step": 69000 |
|
}, |
|
{ |
|
"epoch": 2.8316492829204694, |
|
"learning_rate": 5.841753585397653e-06, |
|
"loss": 0.20057421875, |
|
"step": 69500 |
|
}, |
|
{ |
|
"epoch": 2.8520208604954367, |
|
"learning_rate": 5.739895697522817e-06, |
|
"loss": 0.19898046875, |
|
"step": 70000 |
|
}, |
|
{ |
|
"epoch": 2.872392438070404, |
|
"learning_rate": 5.63803780964798e-06, |
|
"loss": 0.19495703125, |
|
"step": 70500 |
|
}, |
|
{ |
|
"epoch": 2.8927640156453718, |
|
"learning_rate": 5.536179921773143e-06, |
|
"loss": 0.1974296875, |
|
"step": 71000 |
|
}, |
|
{ |
|
"epoch": 2.913135593220339, |
|
"learning_rate": 5.434322033898306e-06, |
|
"loss": 0.20246484375, |
|
"step": 71500 |
|
}, |
|
{ |
|
"epoch": 2.9335071707953064, |
|
"learning_rate": 5.332464146023468e-06, |
|
"loss": 0.19712109375, |
|
"step": 72000 |
|
}, |
|
{ |
|
"epoch": 2.9538787483702738, |
|
"learning_rate": 5.230606258148631e-06, |
|
"loss": 0.202109375, |
|
"step": 72500 |
|
}, |
|
{ |
|
"epoch": 2.974250325945241, |
|
"learning_rate": 5.128748370273794e-06, |
|
"loss": 0.2054140625, |
|
"step": 73000 |
|
}, |
|
{ |
|
"epoch": 2.9946219035202084, |
|
"learning_rate": 5.026890482398958e-06, |
|
"loss": 0.202515625, |
|
"step": 73500 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.884666327050433, |
|
"eval_loss": 0.4447501301765442, |
|
"step": 73632 |
|
}, |
|
{ |
|
"epoch": 3.014993481095176, |
|
"learning_rate": 4.92503259452412e-06, |
|
"loss": 0.13929296875, |
|
"step": 74000 |
|
}, |
|
{ |
|
"epoch": 3.0353650586701435, |
|
"learning_rate": 4.823174706649283e-06, |
|
"loss": 0.14209765625, |
|
"step": 74500 |
|
}, |
|
{ |
|
"epoch": 3.055736636245111, |
|
"learning_rate": 4.721316818774446e-06, |
|
"loss": 0.14396875, |
|
"step": 75000 |
|
}, |
|
{ |
|
"epoch": 3.076108213820078, |
|
"learning_rate": 4.6194589308996094e-06, |
|
"loss": 0.13436328125, |
|
"step": 75500 |
|
}, |
|
{ |
|
"epoch": 3.0964797913950455, |
|
"learning_rate": 4.5176010430247726e-06, |
|
"loss": 0.13274609375, |
|
"step": 76000 |
|
}, |
|
{ |
|
"epoch": 3.1168513689700132, |
|
"learning_rate": 4.415743155149935e-06, |
|
"loss": 0.1425546875, |
|
"step": 76500 |
|
}, |
|
{ |
|
"epoch": 3.1372229465449806, |
|
"learning_rate": 4.313885267275098e-06, |
|
"loss": 0.1465, |
|
"step": 77000 |
|
}, |
|
{ |
|
"epoch": 3.157594524119948, |
|
"learning_rate": 4.212027379400261e-06, |
|
"loss": 0.13299609375, |
|
"step": 77500 |
|
}, |
|
{ |
|
"epoch": 3.1779661016949152, |
|
"learning_rate": 4.110169491525424e-06, |
|
"loss": 0.146578125, |
|
"step": 78000 |
|
}, |
|
{ |
|
"epoch": 3.1983376792698825, |
|
"learning_rate": 4.0083116036505874e-06, |
|
"loss": 0.1382421875, |
|
"step": 78500 |
|
}, |
|
{ |
|
"epoch": 3.21870925684485, |
|
"learning_rate": 3.90645371577575e-06, |
|
"loss": 0.14358984375, |
|
"step": 79000 |
|
}, |
|
{ |
|
"epoch": 3.2390808344198176, |
|
"learning_rate": 3.8045958279009133e-06, |
|
"loss": 0.1374140625, |
|
"step": 79500 |
|
}, |
|
{ |
|
"epoch": 3.259452411994785, |
|
"learning_rate": 3.702737940026076e-06, |
|
"loss": 0.14011328125, |
|
"step": 80000 |
|
}, |
|
{ |
|
"epoch": 3.2798239895697523, |
|
"learning_rate": 3.6008800521512388e-06, |
|
"loss": 0.14218359375, |
|
"step": 80500 |
|
}, |
|
{ |
|
"epoch": 3.3001955671447196, |
|
"learning_rate": 3.4990221642764015e-06, |
|
"loss": 0.14026953125, |
|
"step": 81000 |
|
}, |
|
{ |
|
"epoch": 3.320567144719687, |
|
"learning_rate": 3.397164276401565e-06, |
|
"loss": 0.1326171875, |
|
"step": 81500 |
|
}, |
|
{ |
|
"epoch": 3.3409387222946547, |
|
"learning_rate": 3.2953063885267278e-06, |
|
"loss": 0.124484375, |
|
"step": 82000 |
|
}, |
|
{ |
|
"epoch": 3.361310299869622, |
|
"learning_rate": 3.1934485006518905e-06, |
|
"loss": 0.13862890625, |
|
"step": 82500 |
|
}, |
|
{ |
|
"epoch": 3.3816818774445894, |
|
"learning_rate": 3.091590612777054e-06, |
|
"loss": 0.14094140625, |
|
"step": 83000 |
|
}, |
|
{ |
|
"epoch": 3.4020534550195567, |
|
"learning_rate": 2.9897327249022168e-06, |
|
"loss": 0.13262890625, |
|
"step": 83500 |
|
}, |
|
{ |
|
"epoch": 3.422425032594524, |
|
"learning_rate": 2.8878748370273795e-06, |
|
"loss": 0.14966015625, |
|
"step": 84000 |
|
}, |
|
{ |
|
"epoch": 3.4427966101694913, |
|
"learning_rate": 2.7860169491525422e-06, |
|
"loss": 0.136, |
|
"step": 84500 |
|
}, |
|
{ |
|
"epoch": 3.463168187744459, |
|
"learning_rate": 2.684159061277706e-06, |
|
"loss": 0.15666796875, |
|
"step": 85000 |
|
}, |
|
{ |
|
"epoch": 3.4835397653194264, |
|
"learning_rate": 2.5823011734028685e-06, |
|
"loss": 0.12829296875, |
|
"step": 85500 |
|
}, |
|
{ |
|
"epoch": 3.5039113428943938, |
|
"learning_rate": 2.4804432855280312e-06, |
|
"loss": 0.134625, |
|
"step": 86000 |
|
}, |
|
{ |
|
"epoch": 3.524282920469361, |
|
"learning_rate": 2.3785853976531944e-06, |
|
"loss": 0.1383125, |
|
"step": 86500 |
|
}, |
|
{ |
|
"epoch": 3.5446544980443284, |
|
"learning_rate": 2.2767275097783575e-06, |
|
"loss": 0.145578125, |
|
"step": 87000 |
|
}, |
|
{ |
|
"epoch": 3.565026075619296, |
|
"learning_rate": 2.1748696219035202e-06, |
|
"loss": 0.13809765625, |
|
"step": 87500 |
|
}, |
|
{ |
|
"epoch": 3.5853976531942635, |
|
"learning_rate": 2.0730117340286834e-06, |
|
"loss": 0.131578125, |
|
"step": 88000 |
|
}, |
|
{ |
|
"epoch": 3.605769230769231, |
|
"learning_rate": 1.971153846153846e-06, |
|
"loss": 0.128625, |
|
"step": 88500 |
|
}, |
|
{ |
|
"epoch": 3.626140808344198, |
|
"learning_rate": 1.8692959582790093e-06, |
|
"loss": 0.1566328125, |
|
"step": 89000 |
|
}, |
|
{ |
|
"epoch": 3.6465123859191655, |
|
"learning_rate": 1.7674380704041722e-06, |
|
"loss": 0.13289453125, |
|
"step": 89500 |
|
}, |
|
{ |
|
"epoch": 3.666883963494133, |
|
"learning_rate": 1.6655801825293353e-06, |
|
"loss": 0.13489453125, |
|
"step": 90000 |
|
}, |
|
{ |
|
"epoch": 3.6872555410691, |
|
"learning_rate": 1.563722294654498e-06, |
|
"loss": 0.1342890625, |
|
"step": 90500 |
|
}, |
|
{ |
|
"epoch": 3.707627118644068, |
|
"learning_rate": 1.4618644067796612e-06, |
|
"loss": 0.13596875, |
|
"step": 91000 |
|
}, |
|
{ |
|
"epoch": 3.7279986962190352, |
|
"learning_rate": 1.3600065189048241e-06, |
|
"loss": 0.12205859375, |
|
"step": 91500 |
|
}, |
|
{ |
|
"epoch": 3.7483702737940026, |
|
"learning_rate": 1.258148631029987e-06, |
|
"loss": 0.136265625, |
|
"step": 92000 |
|
}, |
|
{ |
|
"epoch": 3.76874185136897, |
|
"learning_rate": 1.15629074315515e-06, |
|
"loss": 0.1251875, |
|
"step": 92500 |
|
}, |
|
{ |
|
"epoch": 3.7891134289439377, |
|
"learning_rate": 1.0544328552803131e-06, |
|
"loss": 0.1440859375, |
|
"step": 93000 |
|
}, |
|
{ |
|
"epoch": 3.809485006518905, |
|
"learning_rate": 9.52574967405476e-07, |
|
"loss": 0.14747265625, |
|
"step": 93500 |
|
}, |
|
{ |
|
"epoch": 3.8298565840938723, |
|
"learning_rate": 8.507170795306389e-07, |
|
"loss": 0.12144921875, |
|
"step": 94000 |
|
}, |
|
{ |
|
"epoch": 3.8502281616688396, |
|
"learning_rate": 7.488591916558019e-07, |
|
"loss": 0.138546875, |
|
"step": 94500 |
|
}, |
|
{ |
|
"epoch": 3.870599739243807, |
|
"learning_rate": 6.470013037809649e-07, |
|
"loss": 0.1290390625, |
|
"step": 95000 |
|
}, |
|
{ |
|
"epoch": 3.8909713168187743, |
|
"learning_rate": 5.451434159061278e-07, |
|
"loss": 0.13651953125, |
|
"step": 95500 |
|
}, |
|
{ |
|
"epoch": 3.9113428943937416, |
|
"learning_rate": 4.432855280312908e-07, |
|
"loss": 0.14065234375, |
|
"step": 96000 |
|
}, |
|
{ |
|
"epoch": 3.9317144719687094, |
|
"learning_rate": 3.4142764015645373e-07, |
|
"loss": 0.129953125, |
|
"step": 96500 |
|
}, |
|
{ |
|
"epoch": 3.9520860495436767, |
|
"learning_rate": 2.395697522816167e-07, |
|
"loss": 0.127765625, |
|
"step": 97000 |
|
}, |
|
{ |
|
"epoch": 3.972457627118644, |
|
"learning_rate": 1.3771186440677968e-07, |
|
"loss": 0.12146875, |
|
"step": 97500 |
|
}, |
|
{ |
|
"epoch": 3.9928292046936114, |
|
"learning_rate": 3.585397653194264e-08, |
|
"loss": 0.132578125, |
|
"step": 98000 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.88762098828324, |
|
"eval_loss": 0.5551679134368896, |
|
"step": 98176 |
|
} |
|
], |
|
"max_steps": 98176, |
|
"num_train_epochs": 4, |
|
"total_flos": 124136967570323400, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|