|
{ |
|
"best_metric": 0.47534212470054626, |
|
"best_model_checkpoint": "qa-code-finetune/checkpoint-168", |
|
"epoch": 6.413793103448276, |
|
"eval_steps": 500, |
|
"global_step": 168, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 3.448275862068966e-06, |
|
"loss": 1.4775, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 6.896551724137932e-06, |
|
"loss": 1.1041, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 1.0344827586206897e-05, |
|
"loss": 1.3887, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 1.3793103448275863e-05, |
|
"loss": 0.8684, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 1.7241379310344828e-05, |
|
"loss": 1.5696, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 2.0689655172413793e-05, |
|
"loss": 1.3935, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 2.413793103448276e-05, |
|
"loss": 1.3506, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 2.7586206896551727e-05, |
|
"loss": 1.4628, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 3.103448275862069e-05, |
|
"loss": 1.3917, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 3.4482758620689657e-05, |
|
"loss": 1.5045, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 3.793103448275862e-05, |
|
"loss": 1.4222, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.1379310344827587e-05, |
|
"loss": 1.3411, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.482758620689655e-05, |
|
"loss": 1.4104, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.827586206896552e-05, |
|
"loss": 1.2848, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 5.172413793103449e-05, |
|
"loss": 1.3306, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 5.517241379310345e-05, |
|
"loss": 1.228, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 5.862068965517241e-05, |
|
"loss": 1.1974, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 6.206896551724138e-05, |
|
"loss": 1.0348, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 6.551724137931034e-05, |
|
"loss": 1.14, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 6.896551724137931e-05, |
|
"loss": 1.2986, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 7.241379310344828e-05, |
|
"loss": 1.29, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 7.586206896551724e-05, |
|
"loss": 1.0764, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 7.931034482758621e-05, |
|
"loss": 1.244, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 8.275862068965517e-05, |
|
"loss": 1.2264, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"eval_loss": 1.001452922821045, |
|
"eval_runtime": 45.5541, |
|
"eval_samples_per_second": 0.549, |
|
"eval_steps_per_second": 0.154, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 8.620689655172413e-05, |
|
"loss": 1.1878, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 8.96551724137931e-05, |
|
"loss": 1.1258, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 9.310344827586207e-05, |
|
"loss": 1.199, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 9.655172413793105e-05, |
|
"loss": 1.0574, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 0.0001, |
|
"loss": 1.1888, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 0.00010344827586206898, |
|
"loss": 1.1305, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 0.00010689655172413792, |
|
"loss": 0.978, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 0.0001103448275862069, |
|
"loss": 1.1873, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 0.00011379310344827588, |
|
"loss": 1.0899, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 0.00011724137931034482, |
|
"loss": 0.9437, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 0.0001206896551724138, |
|
"loss": 1.0896, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 0.00012413793103448277, |
|
"loss": 0.8899, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 0.00012758620689655174, |
|
"loss": 1.0744, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 0.00013103448275862068, |
|
"loss": 0.9309, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 0.00013448275862068965, |
|
"loss": 1.0572, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 0.00013793103448275863, |
|
"loss": 0.94, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 0.0001413793103448276, |
|
"loss": 0.9554, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 0.00014482758620689657, |
|
"loss": 0.863, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 0.00014827586206896554, |
|
"loss": 0.9654, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 0.00015172413793103449, |
|
"loss": 0.9372, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 0.00015517241379310346, |
|
"loss": 0.7731, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 0.00015862068965517243, |
|
"loss": 0.9372, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 0.00016206896551724137, |
|
"loss": 0.7921, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 0.00016551724137931035, |
|
"loss": 0.7756, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"eval_loss": 0.7165587544441223, |
|
"eval_runtime": 45.4378, |
|
"eval_samples_per_second": 0.55, |
|
"eval_steps_per_second": 0.154, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 0.00016896551724137932, |
|
"loss": 0.8733, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 0.00017241379310344826, |
|
"loss": 0.7803, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 0.00017586206896551723, |
|
"loss": 1.0161, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 0.0001793103448275862, |
|
"loss": 0.8577, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 0.00018275862068965518, |
|
"loss": 0.8049, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 0.00018620689655172415, |
|
"loss": 0.7638, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 0.00018965517241379312, |
|
"loss": 0.7288, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 0.0001931034482758621, |
|
"loss": 0.7548, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 0.00019655172413793104, |
|
"loss": 0.6096, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 0.0002, |
|
"loss": 0.9043, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 0.0001996168582375479, |
|
"loss": 0.6023, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"learning_rate": 0.0001992337164750958, |
|
"loss": 0.6247, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"learning_rate": 0.00019885057471264367, |
|
"loss": 0.7316, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 0.0001984674329501916, |
|
"loss": 0.6398, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 0.00019808429118773948, |
|
"loss": 0.7006, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 0.00019770114942528738, |
|
"loss": 0.6344, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"learning_rate": 0.00019731800766283525, |
|
"loss": 0.7474, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 0.00019693486590038314, |
|
"loss": 0.5874, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"learning_rate": 0.00019655172413793104, |
|
"loss": 0.6165, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"learning_rate": 0.00019616858237547893, |
|
"loss": 0.7373, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"learning_rate": 0.00019578544061302683, |
|
"loss": 0.702, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"learning_rate": 0.00019540229885057472, |
|
"loss": 0.7673, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 0.00019501915708812262, |
|
"loss": 0.767, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"learning_rate": 0.0001946360153256705, |
|
"loss": 0.712, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"eval_loss": 0.5802827477455139, |
|
"eval_runtime": 45.5609, |
|
"eval_samples_per_second": 0.549, |
|
"eval_steps_per_second": 0.154, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 3.02, |
|
"learning_rate": 0.0001942528735632184, |
|
"loss": 0.6086, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 3.03, |
|
"learning_rate": 0.00019386973180076628, |
|
"loss": 0.5534, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 3.05, |
|
"learning_rate": 0.00019348659003831417, |
|
"loss": 0.7017, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 3.07, |
|
"learning_rate": 0.0001931034482758621, |
|
"loss": 0.7676, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 3.09, |
|
"learning_rate": 0.00019272030651341, |
|
"loss": 0.6932, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 3.1, |
|
"learning_rate": 0.00019233716475095786, |
|
"loss": 0.5797, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 3.12, |
|
"learning_rate": 0.00019195402298850575, |
|
"loss": 0.5496, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 3.14, |
|
"learning_rate": 0.00019157088122605365, |
|
"loss": 0.5773, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 3.16, |
|
"learning_rate": 0.00019118773946360154, |
|
"loss": 0.6998, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 3.17, |
|
"learning_rate": 0.00019080459770114944, |
|
"loss": 0.5153, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 3.19, |
|
"learning_rate": 0.00019042145593869733, |
|
"loss": 0.5796, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 3.21, |
|
"learning_rate": 0.00019003831417624523, |
|
"loss": 0.528, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 3.22, |
|
"learning_rate": 0.00018965517241379312, |
|
"loss": 0.5977, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 3.24, |
|
"learning_rate": 0.00018927203065134102, |
|
"loss": 0.5815, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 3.26, |
|
"learning_rate": 0.00018888888888888888, |
|
"loss": 0.6144, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 3.28, |
|
"learning_rate": 0.00018850574712643678, |
|
"loss": 0.6932, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 3.29, |
|
"learning_rate": 0.0001881226053639847, |
|
"loss": 0.6371, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 3.31, |
|
"learning_rate": 0.0001877394636015326, |
|
"loss": 0.6041, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 3.33, |
|
"learning_rate": 0.00018735632183908046, |
|
"loss": 0.5556, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 3.34, |
|
"learning_rate": 0.00018697318007662836, |
|
"loss": 0.5421, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 3.36, |
|
"learning_rate": 0.00018659003831417625, |
|
"loss": 0.7292, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 3.38, |
|
"learning_rate": 0.00018620689655172415, |
|
"loss": 0.4931, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 3.4, |
|
"learning_rate": 0.00018582375478927202, |
|
"loss": 0.6219, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 3.41, |
|
"learning_rate": 0.00018544061302681994, |
|
"loss": 0.6215, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 3.41, |
|
"eval_loss": 0.5279306173324585, |
|
"eval_runtime": 45.1884, |
|
"eval_samples_per_second": 0.553, |
|
"eval_steps_per_second": 0.155, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 4.02, |
|
"learning_rate": 0.00018505747126436783, |
|
"loss": 0.5279, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 4.03, |
|
"learning_rate": 0.00018467432950191573, |
|
"loss": 0.6158, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 4.05, |
|
"learning_rate": 0.0001842911877394636, |
|
"loss": 0.479, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 4.07, |
|
"learning_rate": 0.0001839080459770115, |
|
"loss": 0.6229, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 4.09, |
|
"learning_rate": 0.0001835249042145594, |
|
"loss": 0.5331, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 4.1, |
|
"learning_rate": 0.00018314176245210728, |
|
"loss": 0.4522, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 4.12, |
|
"learning_rate": 0.00018275862068965518, |
|
"loss": 0.349, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 4.14, |
|
"learning_rate": 0.00018237547892720307, |
|
"loss": 0.6031, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 4.16, |
|
"learning_rate": 0.00018199233716475097, |
|
"loss": 0.5603, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 4.17, |
|
"learning_rate": 0.00018160919540229886, |
|
"loss": 0.5386, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 4.19, |
|
"learning_rate": 0.00018122605363984676, |
|
"loss": 0.5133, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 4.21, |
|
"learning_rate": 0.00018084291187739463, |
|
"loss": 0.5644, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 4.22, |
|
"learning_rate": 0.00018045977011494252, |
|
"loss": 0.5141, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 4.24, |
|
"learning_rate": 0.00018007662835249044, |
|
"loss": 0.4475, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 4.26, |
|
"learning_rate": 0.00017969348659003834, |
|
"loss": 0.4485, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 4.28, |
|
"learning_rate": 0.0001793103448275862, |
|
"loss": 0.4857, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 4.29, |
|
"learning_rate": 0.0001789272030651341, |
|
"loss": 0.7387, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 4.31, |
|
"learning_rate": 0.000178544061302682, |
|
"loss": 0.5216, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 4.33, |
|
"learning_rate": 0.0001781609195402299, |
|
"loss": 0.4717, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 4.34, |
|
"learning_rate": 0.00017777777777777779, |
|
"loss": 0.4892, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 4.36, |
|
"learning_rate": 0.00017739463601532568, |
|
"loss": 0.5037, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 4.38, |
|
"learning_rate": 0.00017701149425287358, |
|
"loss": 0.5465, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 4.4, |
|
"learning_rate": 0.00017662835249042147, |
|
"loss": 0.5272, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 4.41, |
|
"learning_rate": 0.00017624521072796937, |
|
"loss": 0.5952, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 4.41, |
|
"eval_loss": 0.4989665448665619, |
|
"eval_runtime": 45.2351, |
|
"eval_samples_per_second": 0.553, |
|
"eval_steps_per_second": 0.155, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 5.02, |
|
"learning_rate": 0.00017586206896551723, |
|
"loss": 0.4193, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 5.03, |
|
"learning_rate": 0.00017547892720306513, |
|
"loss": 0.4954, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 5.05, |
|
"learning_rate": 0.00017509578544061302, |
|
"loss": 0.4177, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 5.07, |
|
"learning_rate": 0.00017471264367816095, |
|
"loss": 0.3927, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 5.09, |
|
"learning_rate": 0.00017432950191570881, |
|
"loss": 0.3624, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 5.1, |
|
"learning_rate": 0.0001739463601532567, |
|
"loss": 0.481, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 5.12, |
|
"learning_rate": 0.0001735632183908046, |
|
"loss": 0.4035, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 5.14, |
|
"learning_rate": 0.0001731800766283525, |
|
"loss": 0.4724, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 5.16, |
|
"learning_rate": 0.0001727969348659004, |
|
"loss": 0.389, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 5.17, |
|
"learning_rate": 0.00017241379310344826, |
|
"loss": 0.5242, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 5.19, |
|
"learning_rate": 0.00017203065134099618, |
|
"loss": 0.4476, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 5.21, |
|
"learning_rate": 0.00017164750957854408, |
|
"loss": 0.5525, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 5.22, |
|
"learning_rate": 0.00017126436781609197, |
|
"loss": 0.4405, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 5.24, |
|
"learning_rate": 0.00017088122605363984, |
|
"loss": 0.4422, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 5.26, |
|
"learning_rate": 0.00017049808429118774, |
|
"loss": 0.5116, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 5.28, |
|
"learning_rate": 0.00017011494252873563, |
|
"loss": 0.3784, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 5.29, |
|
"learning_rate": 0.00016973180076628356, |
|
"loss": 0.5097, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 5.31, |
|
"learning_rate": 0.00016934865900383142, |
|
"loss": 0.3885, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 5.33, |
|
"learning_rate": 0.00016896551724137932, |
|
"loss": 0.4608, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 5.34, |
|
"learning_rate": 0.0001685823754789272, |
|
"loss": 0.4212, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 5.36, |
|
"learning_rate": 0.0001681992337164751, |
|
"loss": 0.4852, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 5.38, |
|
"learning_rate": 0.000167816091954023, |
|
"loss": 0.484, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 5.4, |
|
"learning_rate": 0.00016743295019157087, |
|
"loss": 0.5694, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 5.41, |
|
"learning_rate": 0.0001670498084291188, |
|
"loss": 0.5575, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 5.41, |
|
"eval_loss": 0.4801803529262543, |
|
"eval_runtime": 45.3478, |
|
"eval_samples_per_second": 0.551, |
|
"eval_steps_per_second": 0.154, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 6.02, |
|
"learning_rate": 0.0001666666666666667, |
|
"loss": 0.4301, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 6.03, |
|
"learning_rate": 0.00016628352490421458, |
|
"loss": 0.4271, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 6.05, |
|
"learning_rate": 0.00016590038314176245, |
|
"loss": 0.4016, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 6.07, |
|
"learning_rate": 0.00016551724137931035, |
|
"loss": 0.4963, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 6.09, |
|
"learning_rate": 0.00016513409961685824, |
|
"loss": 0.4159, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 6.1, |
|
"learning_rate": 0.00016475095785440614, |
|
"loss": 0.3818, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 6.12, |
|
"learning_rate": 0.00016436781609195403, |
|
"loss": 0.3326, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 6.14, |
|
"learning_rate": 0.00016398467432950193, |
|
"loss": 0.3099, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 6.16, |
|
"learning_rate": 0.00016360153256704982, |
|
"loss": 0.3519, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 6.17, |
|
"learning_rate": 0.00016321839080459772, |
|
"loss": 0.4866, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 6.19, |
|
"learning_rate": 0.0001628352490421456, |
|
"loss": 0.4286, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 6.21, |
|
"learning_rate": 0.00016245210727969348, |
|
"loss": 0.3815, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 6.22, |
|
"learning_rate": 0.00016206896551724137, |
|
"loss": 0.4642, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 6.24, |
|
"learning_rate": 0.0001616858237547893, |
|
"loss": 0.4076, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 6.26, |
|
"learning_rate": 0.0001613026819923372, |
|
"loss": 0.3396, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 6.28, |
|
"learning_rate": 0.00016091954022988506, |
|
"loss": 0.2513, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 6.29, |
|
"learning_rate": 0.00016053639846743295, |
|
"loss": 0.3305, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 6.31, |
|
"learning_rate": 0.00016015325670498085, |
|
"loss": 0.4355, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 6.33, |
|
"learning_rate": 0.00015977011494252874, |
|
"loss": 0.4543, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 6.34, |
|
"learning_rate": 0.00015938697318007664, |
|
"loss": 0.3778, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 6.36, |
|
"learning_rate": 0.00015900383141762453, |
|
"loss": 0.4365, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 6.38, |
|
"learning_rate": 0.00015862068965517243, |
|
"loss": 0.4298, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 6.4, |
|
"learning_rate": 0.00015823754789272032, |
|
"loss": 0.4427, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 6.41, |
|
"learning_rate": 0.00015785440613026822, |
|
"loss": 0.4207, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 6.41, |
|
"eval_loss": 0.47534212470054626, |
|
"eval_runtime": 45.2548, |
|
"eval_samples_per_second": 0.552, |
|
"eval_steps_per_second": 0.155, |
|
"step": 168 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 580, |
|
"num_train_epochs": 10, |
|
"save_steps": 500, |
|
"total_flos": 2.731509408595968e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|