|
{ |
|
"best_metric": 65.65919749869724, |
|
"best_model_checkpoint": "/scratch/mrahma45/pixel/finetuned_models/mbert/mbert-base-finetuned-parsing-ud-Vietnamese-VTB/checkpoint-14000", |
|
"epoch": 340.90909090909093, |
|
"global_step": 15000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 2.27, |
|
"learning_rate": 7.840000000000001e-05, |
|
"loss": 3.3293, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 4.55, |
|
"learning_rate": 7.947382550335571e-05, |
|
"loss": 0.8977, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 6.82, |
|
"learning_rate": 7.893691275167786e-05, |
|
"loss": 0.4112, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 9.09, |
|
"learning_rate": 7.840536912751678e-05, |
|
"loss": 0.2437, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 11.36, |
|
"learning_rate": 7.786845637583893e-05, |
|
"loss": 0.1661, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 11.36, |
|
"eval_las": 64.63435817265936, |
|
"eval_loss": 3.016636848449707, |
|
"eval_runtime": 5.2499, |
|
"eval_samples_per_second": 152.385, |
|
"eval_steps_per_second": 19.048, |
|
"eval_uas": 73.35417752301547, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 13.64, |
|
"learning_rate": 7.733154362416108e-05, |
|
"loss": 0.1204, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 15.91, |
|
"learning_rate": 7.679463087248322e-05, |
|
"loss": 0.1057, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 18.18, |
|
"learning_rate": 7.625771812080537e-05, |
|
"loss": 0.0868, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 20.45, |
|
"learning_rate": 7.572080536912752e-05, |
|
"loss": 0.0739, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 22.73, |
|
"learning_rate": 7.518389261744967e-05, |
|
"loss": 0.0658, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 22.73, |
|
"eval_las": 64.39986103873545, |
|
"eval_loss": 3.5961220264434814, |
|
"eval_runtime": 5.2576, |
|
"eval_samples_per_second": 152.161, |
|
"eval_steps_per_second": 19.02, |
|
"eval_uas": 73.18047594233109, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"learning_rate": 7.464697986577182e-05, |
|
"loss": 0.0632, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 27.27, |
|
"learning_rate": 7.411006711409397e-05, |
|
"loss": 0.0521, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 29.55, |
|
"learning_rate": 7.357315436241611e-05, |
|
"loss": 0.0514, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 31.82, |
|
"learning_rate": 7.303624161073826e-05, |
|
"loss": 0.052, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 34.09, |
|
"learning_rate": 7.249932885906041e-05, |
|
"loss": 0.0438, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 34.09, |
|
"eval_las": 64.4172311968039, |
|
"eval_loss": 3.8819875717163086, |
|
"eval_runtime": 5.2578, |
|
"eval_samples_per_second": 152.154, |
|
"eval_steps_per_second": 19.019, |
|
"eval_uas": 72.91992357130451, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 36.36, |
|
"learning_rate": 7.196241610738256e-05, |
|
"loss": 0.0431, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 38.64, |
|
"learning_rate": 7.142550335570471e-05, |
|
"loss": 0.0391, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 40.91, |
|
"learning_rate": 7.088859060402686e-05, |
|
"loss": 0.0365, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 43.18, |
|
"learning_rate": 7.0351677852349e-05, |
|
"loss": 0.0387, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 45.45, |
|
"learning_rate": 6.981476510067114e-05, |
|
"loss": 0.0326, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 45.45, |
|
"eval_las": 64.50408198714608, |
|
"eval_loss": 4.396294116973877, |
|
"eval_runtime": 5.2355, |
|
"eval_samples_per_second": 152.803, |
|
"eval_steps_per_second": 19.1, |
|
"eval_uas": 72.88518325516762, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 47.73, |
|
"learning_rate": 6.927785234899329e-05, |
|
"loss": 0.0318, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"learning_rate": 6.874630872483222e-05, |
|
"loss": 0.0287, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 52.27, |
|
"learning_rate": 6.820939597315437e-05, |
|
"loss": 0.0283, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 54.55, |
|
"learning_rate": 6.767248322147652e-05, |
|
"loss": 0.0249, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 56.82, |
|
"learning_rate": 6.713557046979866e-05, |
|
"loss": 0.0291, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 56.82, |
|
"eval_las": 64.75594927913843, |
|
"eval_loss": 4.8261637687683105, |
|
"eval_runtime": 5.2311, |
|
"eval_samples_per_second": 152.931, |
|
"eval_steps_per_second": 19.116, |
|
"eval_uas": 73.19784610039952, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 59.09, |
|
"learning_rate": 6.659865771812081e-05, |
|
"loss": 0.0239, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 61.36, |
|
"learning_rate": 6.606174496644296e-05, |
|
"loss": 0.0216, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 63.64, |
|
"learning_rate": 6.55248322147651e-05, |
|
"loss": 0.0233, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 65.91, |
|
"learning_rate": 6.498791946308724e-05, |
|
"loss": 0.022, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 68.18, |
|
"learning_rate": 6.445100671140939e-05, |
|
"loss": 0.0234, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 68.18, |
|
"eval_las": 64.61698801459093, |
|
"eval_loss": 4.922646999359131, |
|
"eval_runtime": 5.2324, |
|
"eval_samples_per_second": 152.894, |
|
"eval_steps_per_second": 19.112, |
|
"eval_uas": 73.38023276011812, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 70.45, |
|
"learning_rate": 6.391409395973154e-05, |
|
"loss": 0.0252, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 72.73, |
|
"learning_rate": 6.337718120805369e-05, |
|
"loss": 0.0195, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 75.0, |
|
"learning_rate": 6.284026845637584e-05, |
|
"loss": 0.0196, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 77.27, |
|
"learning_rate": 6.230335570469799e-05, |
|
"loss": 0.0217, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 79.55, |
|
"learning_rate": 6.176644295302013e-05, |
|
"loss": 0.0156, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 79.55, |
|
"eval_las": 64.50408198714608, |
|
"eval_loss": 5.57904052734375, |
|
"eval_runtime": 5.2317, |
|
"eval_samples_per_second": 152.914, |
|
"eval_steps_per_second": 19.114, |
|
"eval_uas": 73.46708355046032, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 81.82, |
|
"learning_rate": 6.122953020134228e-05, |
|
"loss": 0.0181, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 84.09, |
|
"learning_rate": 6.069261744966444e-05, |
|
"loss": 0.0175, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 86.36, |
|
"learning_rate": 6.0155704697986585e-05, |
|
"loss": 0.0134, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 88.64, |
|
"learning_rate": 5.9618791946308734e-05, |
|
"loss": 0.0191, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 90.91, |
|
"learning_rate": 5.9081879194630875e-05, |
|
"loss": 0.0157, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 90.91, |
|
"eval_las": 64.5127670661803, |
|
"eval_loss": 5.583916664123535, |
|
"eval_runtime": 5.2413, |
|
"eval_samples_per_second": 152.634, |
|
"eval_steps_per_second": 19.079, |
|
"eval_uas": 73.11968038909154, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 93.18, |
|
"learning_rate": 5.854496644295302e-05, |
|
"loss": 0.012, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 95.45, |
|
"learning_rate": 5.800805369127517e-05, |
|
"loss": 0.0145, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 97.73, |
|
"learning_rate": 5.747114093959732e-05, |
|
"loss": 0.0156, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 100.0, |
|
"learning_rate": 5.693422818791947e-05, |
|
"loss": 0.0119, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 102.27, |
|
"learning_rate": 5.6397315436241616e-05, |
|
"loss": 0.0115, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 102.27, |
|
"eval_las": 64.7820045162411, |
|
"eval_loss": 6.002554893493652, |
|
"eval_runtime": 5.23, |
|
"eval_samples_per_second": 152.965, |
|
"eval_steps_per_second": 19.121, |
|
"eval_uas": 73.13705054715997, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 104.55, |
|
"learning_rate": 5.5860402684563764e-05, |
|
"loss": 0.0132, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 106.82, |
|
"learning_rate": 5.532348993288591e-05, |
|
"loss": 0.0136, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 109.09, |
|
"learning_rate": 5.478657718120806e-05, |
|
"loss": 0.0154, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 111.36, |
|
"learning_rate": 5.424966442953021e-05, |
|
"loss": 0.0106, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 113.64, |
|
"learning_rate": 5.371275167785236e-05, |
|
"loss": 0.0129, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 113.64, |
|
"eval_las": 64.47802675004343, |
|
"eval_loss": 5.8163933753967285, |
|
"eval_runtime": 5.2362, |
|
"eval_samples_per_second": 152.783, |
|
"eval_steps_per_second": 19.098, |
|
"eval_uas": 73.414973076255, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 115.91, |
|
"learning_rate": 5.31758389261745e-05, |
|
"loss": 0.0106, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 118.18, |
|
"learning_rate": 5.263892617449665e-05, |
|
"loss": 0.008, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 120.45, |
|
"learning_rate": 5.2102013422818795e-05, |
|
"loss": 0.009, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 122.73, |
|
"learning_rate": 5.1565100671140944e-05, |
|
"loss": 0.0111, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 125.0, |
|
"learning_rate": 5.102818791946309e-05, |
|
"loss": 0.0094, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 125.0, |
|
"eval_las": 64.67778356783047, |
|
"eval_loss": 5.964992523193359, |
|
"eval_runtime": 5.2463, |
|
"eval_samples_per_second": 152.489, |
|
"eval_steps_per_second": 19.061, |
|
"eval_uas": 73.24127149557062, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 127.27, |
|
"learning_rate": 5.049127516778524e-05, |
|
"loss": 0.0101, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 129.55, |
|
"learning_rate": 4.995436241610739e-05, |
|
"loss": 0.0086, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 131.82, |
|
"learning_rate": 4.941744966442954e-05, |
|
"loss": 0.0081, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 134.09, |
|
"learning_rate": 4.8880536912751685e-05, |
|
"loss": 0.0087, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 136.36, |
|
"learning_rate": 4.834362416107383e-05, |
|
"loss": 0.0067, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 136.36, |
|
"eval_las": 64.93833593885704, |
|
"eval_loss": 6.23219108581543, |
|
"eval_runtime": 5.2405, |
|
"eval_samples_per_second": 152.658, |
|
"eval_steps_per_second": 19.082, |
|
"eval_uas": 73.22390133750217, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 138.64, |
|
"learning_rate": 4.780671140939598e-05, |
|
"loss": 0.0083, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 140.91, |
|
"learning_rate": 4.726979865771813e-05, |
|
"loss": 0.0104, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 143.18, |
|
"learning_rate": 4.673288590604027e-05, |
|
"loss": 0.0056, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 145.45, |
|
"learning_rate": 4.619597315436242e-05, |
|
"loss": 0.0054, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 147.73, |
|
"learning_rate": 4.565906040268457e-05, |
|
"loss": 0.0079, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 147.73, |
|
"eval_las": 64.39986103873545, |
|
"eval_loss": 6.42156982421875, |
|
"eval_runtime": 5.2298, |
|
"eval_samples_per_second": 152.969, |
|
"eval_steps_per_second": 19.121, |
|
"eval_uas": 72.78096230675699, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 150.0, |
|
"learning_rate": 4.5122147651006716e-05, |
|
"loss": 0.0065, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 152.27, |
|
"learning_rate": 4.4585234899328864e-05, |
|
"loss": 0.007, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 154.55, |
|
"learning_rate": 4.404832214765101e-05, |
|
"loss": 0.0073, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 156.82, |
|
"learning_rate": 4.351140939597316e-05, |
|
"loss": 0.0044, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 159.09, |
|
"learning_rate": 4.297449664429531e-05, |
|
"loss": 0.0052, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 159.09, |
|
"eval_las": 64.4172311968039, |
|
"eval_loss": 6.679661750793457, |
|
"eval_runtime": 5.2549, |
|
"eval_samples_per_second": 152.237, |
|
"eval_steps_per_second": 19.03, |
|
"eval_uas": 72.98940420357826, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 161.36, |
|
"learning_rate": 4.243758389261746e-05, |
|
"loss": 0.0061, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 163.64, |
|
"learning_rate": 4.1900671140939605e-05, |
|
"loss": 0.0089, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 165.91, |
|
"learning_rate": 4.1363758389261754e-05, |
|
"loss": 0.0046, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 168.18, |
|
"learning_rate": 4.0826845637583895e-05, |
|
"loss": 0.0052, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 170.45, |
|
"learning_rate": 4.028993288590604e-05, |
|
"loss": 0.0047, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 170.45, |
|
"eval_las": 64.67778356783047, |
|
"eval_loss": 6.556234836578369, |
|
"eval_runtime": 5.2573, |
|
"eval_samples_per_second": 152.17, |
|
"eval_steps_per_second": 19.021, |
|
"eval_uas": 73.36286260204969, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 172.73, |
|
"learning_rate": 3.975302013422819e-05, |
|
"loss": 0.0047, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 175.0, |
|
"learning_rate": 3.921610738255034e-05, |
|
"loss": 0.0048, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 177.27, |
|
"learning_rate": 3.867919463087249e-05, |
|
"loss": 0.0047, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 179.55, |
|
"learning_rate": 3.8142281879194636e-05, |
|
"loss": 0.0047, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 181.82, |
|
"learning_rate": 3.7605369127516784e-05, |
|
"loss": 0.005, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 181.82, |
|
"eval_las": 64.84280006948063, |
|
"eval_loss": 6.436727523803711, |
|
"eval_runtime": 5.2395, |
|
"eval_samples_per_second": 152.686, |
|
"eval_steps_per_second": 19.086, |
|
"eval_uas": 73.31075212784435, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 184.09, |
|
"learning_rate": 3.706845637583893e-05, |
|
"loss": 0.0032, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 186.36, |
|
"learning_rate": 3.653154362416108e-05, |
|
"loss": 0.0044, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 188.64, |
|
"learning_rate": 3.599463087248322e-05, |
|
"loss": 0.0066, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 190.91, |
|
"learning_rate": 3.545771812080537e-05, |
|
"loss": 0.0024, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 193.18, |
|
"learning_rate": 3.492080536912752e-05, |
|
"loss": 0.004, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 193.18, |
|
"eval_las": 64.98176133402815, |
|
"eval_loss": 6.600097179412842, |
|
"eval_runtime": 5.2414, |
|
"eval_samples_per_second": 152.632, |
|
"eval_steps_per_second": 19.079, |
|
"eval_uas": 73.71026576341845, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 195.45, |
|
"learning_rate": 3.438389261744967e-05, |
|
"loss": 0.0021, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 197.73, |
|
"learning_rate": 3.3846979865771815e-05, |
|
"loss": 0.0035, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 200.0, |
|
"learning_rate": 3.3310067114093964e-05, |
|
"loss": 0.0033, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 202.27, |
|
"learning_rate": 3.277315436241611e-05, |
|
"loss": 0.0054, |
|
"step": 8900 |
|
}, |
|
{ |
|
"epoch": 204.55, |
|
"learning_rate": 3.223624161073826e-05, |
|
"loss": 0.0033, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 204.55, |
|
"eval_las": 64.95570609692548, |
|
"eval_loss": 6.6512603759765625, |
|
"eval_runtime": 5.2201, |
|
"eval_samples_per_second": 153.255, |
|
"eval_steps_per_second": 19.157, |
|
"eval_uas": 73.4583984714261, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 206.82, |
|
"learning_rate": 3.169932885906041e-05, |
|
"loss": 0.0044, |
|
"step": 9100 |
|
}, |
|
{ |
|
"epoch": 209.09, |
|
"learning_rate": 3.1162416107382557e-05, |
|
"loss": 0.002, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 211.36, |
|
"learning_rate": 3.0630872483221477e-05, |
|
"loss": 0.0018, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 213.64, |
|
"learning_rate": 3.0093959731543628e-05, |
|
"loss": 0.0022, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 215.91, |
|
"learning_rate": 2.9557046979865776e-05, |
|
"loss": 0.0023, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 215.91, |
|
"eval_las": 64.58224769845405, |
|
"eval_loss": 7.292083740234375, |
|
"eval_runtime": 5.2437, |
|
"eval_samples_per_second": 152.565, |
|
"eval_steps_per_second": 19.071, |
|
"eval_uas": 72.99808928261248, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 218.18, |
|
"learning_rate": 2.9020134228187925e-05, |
|
"loss": 0.0021, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 220.45, |
|
"learning_rate": 2.848322147651007e-05, |
|
"loss": 0.0025, |
|
"step": 9700 |
|
}, |
|
{ |
|
"epoch": 222.73, |
|
"learning_rate": 2.7946308724832218e-05, |
|
"loss": 0.002, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 225.0, |
|
"learning_rate": 2.7409395973154366e-05, |
|
"loss": 0.0015, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 227.27, |
|
"learning_rate": 2.6872483221476514e-05, |
|
"loss": 0.0019, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 227.27, |
|
"eval_las": 65.11203751954143, |
|
"eval_loss": 7.293988227844238, |
|
"eval_runtime": 5.243, |
|
"eval_samples_per_second": 152.584, |
|
"eval_steps_per_second": 19.073, |
|
"eval_uas": 73.39760291818655, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 229.55, |
|
"learning_rate": 2.6335570469798663e-05, |
|
"loss": 0.0014, |
|
"step": 10100 |
|
}, |
|
{ |
|
"epoch": 231.82, |
|
"learning_rate": 2.5798657718120804e-05, |
|
"loss": 0.0014, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 234.09, |
|
"learning_rate": 2.5261744966442952e-05, |
|
"loss": 0.0024, |
|
"step": 10300 |
|
}, |
|
{ |
|
"epoch": 236.36, |
|
"learning_rate": 2.47248322147651e-05, |
|
"loss": 0.0013, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 238.64, |
|
"learning_rate": 2.4187919463087252e-05, |
|
"loss": 0.0015, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 238.64, |
|
"eval_las": 65.39864512767066, |
|
"eval_loss": 7.597438335418701, |
|
"eval_runtime": 5.2412, |
|
"eval_samples_per_second": 152.636, |
|
"eval_steps_per_second": 19.08, |
|
"eval_uas": 73.75369115858955, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 240.91, |
|
"learning_rate": 2.36510067114094e-05, |
|
"loss": 0.0015, |
|
"step": 10600 |
|
}, |
|
{ |
|
"epoch": 243.18, |
|
"learning_rate": 2.311409395973155e-05, |
|
"loss": 0.0013, |
|
"step": 10700 |
|
}, |
|
{ |
|
"epoch": 245.45, |
|
"learning_rate": 2.257718120805369e-05, |
|
"loss": 0.0015, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 247.73, |
|
"learning_rate": 2.204026845637584e-05, |
|
"loss": 0.0017, |
|
"step": 10900 |
|
}, |
|
{ |
|
"epoch": 250.0, |
|
"learning_rate": 2.1503355704697987e-05, |
|
"loss": 0.0021, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 250.0, |
|
"eval_las": 64.95570609692548, |
|
"eval_loss": 7.345833778381348, |
|
"eval_runtime": 5.2465, |
|
"eval_samples_per_second": 152.483, |
|
"eval_steps_per_second": 19.06, |
|
"eval_uas": 73.4583984714261, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 252.27, |
|
"learning_rate": 2.0966442953020135e-05, |
|
"loss": 0.0009, |
|
"step": 11100 |
|
}, |
|
{ |
|
"epoch": 254.55, |
|
"learning_rate": 2.0429530201342283e-05, |
|
"loss": 0.0009, |
|
"step": 11200 |
|
}, |
|
{ |
|
"epoch": 256.82, |
|
"learning_rate": 1.989261744966443e-05, |
|
"loss": 0.0008, |
|
"step": 11300 |
|
}, |
|
{ |
|
"epoch": 259.09, |
|
"learning_rate": 1.935570469798658e-05, |
|
"loss": 0.0011, |
|
"step": 11400 |
|
}, |
|
{ |
|
"epoch": 261.36, |
|
"learning_rate": 1.8818791946308724e-05, |
|
"loss": 0.0024, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 261.36, |
|
"eval_las": 65.38996004863644, |
|
"eval_loss": 7.281766414642334, |
|
"eval_runtime": 5.2427, |
|
"eval_samples_per_second": 152.593, |
|
"eval_steps_per_second": 19.074, |
|
"eval_uas": 73.80580163279485, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 263.64, |
|
"learning_rate": 1.8281879194630873e-05, |
|
"loss": 0.0012, |
|
"step": 11600 |
|
}, |
|
{ |
|
"epoch": 265.91, |
|
"learning_rate": 1.774496644295302e-05, |
|
"loss": 0.0018, |
|
"step": 11700 |
|
}, |
|
{ |
|
"epoch": 268.18, |
|
"learning_rate": 1.720805369127517e-05, |
|
"loss": 0.0011, |
|
"step": 11800 |
|
}, |
|
{ |
|
"epoch": 270.45, |
|
"learning_rate": 1.6671140939597317e-05, |
|
"loss": 0.0008, |
|
"step": 11900 |
|
}, |
|
{ |
|
"epoch": 272.73, |
|
"learning_rate": 1.6134228187919466e-05, |
|
"loss": 0.0033, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 272.73, |
|
"eval_las": 65.47681083897864, |
|
"eval_loss": 7.280235767364502, |
|
"eval_runtime": 5.2413, |
|
"eval_samples_per_second": 152.633, |
|
"eval_steps_per_second": 19.079, |
|
"eval_uas": 73.65815528921314, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 275.0, |
|
"learning_rate": 1.559731543624161e-05, |
|
"loss": 0.0007, |
|
"step": 12100 |
|
}, |
|
{ |
|
"epoch": 277.27, |
|
"learning_rate": 1.506040268456376e-05, |
|
"loss": 0.0003, |
|
"step": 12200 |
|
}, |
|
{ |
|
"epoch": 279.55, |
|
"learning_rate": 1.4523489932885909e-05, |
|
"loss": 0.0008, |
|
"step": 12300 |
|
}, |
|
{ |
|
"epoch": 281.82, |
|
"learning_rate": 1.3986577181208053e-05, |
|
"loss": 0.0009, |
|
"step": 12400 |
|
}, |
|
{ |
|
"epoch": 284.09, |
|
"learning_rate": 1.3449664429530202e-05, |
|
"loss": 0.0005, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 284.09, |
|
"eval_las": 65.32916449539691, |
|
"eval_loss": 7.4806389808654785, |
|
"eval_runtime": 5.2418, |
|
"eval_samples_per_second": 152.62, |
|
"eval_steps_per_second": 19.078, |
|
"eval_uas": 73.71026576341845, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 286.36, |
|
"learning_rate": 1.2912751677852352e-05, |
|
"loss": 0.0005, |
|
"step": 12600 |
|
}, |
|
{ |
|
"epoch": 288.64, |
|
"learning_rate": 1.2375838926174497e-05, |
|
"loss": 0.0004, |
|
"step": 12700 |
|
}, |
|
{ |
|
"epoch": 290.91, |
|
"learning_rate": 1.1838926174496645e-05, |
|
"loss": 0.0005, |
|
"step": 12800 |
|
}, |
|
{ |
|
"epoch": 293.18, |
|
"learning_rate": 1.1302013422818795e-05, |
|
"loss": 0.0006, |
|
"step": 12900 |
|
}, |
|
{ |
|
"epoch": 295.45, |
|
"learning_rate": 1.076510067114094e-05, |
|
"loss": 0.0005, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 295.45, |
|
"eval_las": 65.25968386312316, |
|
"eval_loss": 7.441517353057861, |
|
"eval_runtime": 5.2481, |
|
"eval_samples_per_second": 152.435, |
|
"eval_steps_per_second": 19.054, |
|
"eval_uas": 73.53656418273407, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 297.73, |
|
"learning_rate": 1.0228187919463088e-05, |
|
"loss": 0.0005, |
|
"step": 13100 |
|
}, |
|
{ |
|
"epoch": 300.0, |
|
"learning_rate": 9.691275167785236e-06, |
|
"loss": 0.0002, |
|
"step": 13200 |
|
}, |
|
{ |
|
"epoch": 302.27, |
|
"learning_rate": 9.154362416107383e-06, |
|
"loss": 0.0002, |
|
"step": 13300 |
|
}, |
|
{ |
|
"epoch": 304.55, |
|
"learning_rate": 8.617449664429531e-06, |
|
"loss": 0.0003, |
|
"step": 13400 |
|
}, |
|
{ |
|
"epoch": 306.82, |
|
"learning_rate": 8.080536912751679e-06, |
|
"loss": 0.0003, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 306.82, |
|
"eval_las": 65.45075560187598, |
|
"eval_loss": 7.729467868804932, |
|
"eval_runtime": 5.2488, |
|
"eval_samples_per_second": 152.416, |
|
"eval_steps_per_second": 19.052, |
|
"eval_uas": 73.65815528921314, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 309.09, |
|
"learning_rate": 7.543624161073826e-06, |
|
"loss": 0.0002, |
|
"step": 13600 |
|
}, |
|
{ |
|
"epoch": 311.36, |
|
"learning_rate": 7.006711409395974e-06, |
|
"loss": 0.0003, |
|
"step": 13700 |
|
}, |
|
{ |
|
"epoch": 313.64, |
|
"learning_rate": 6.469798657718121e-06, |
|
"loss": 0.0003, |
|
"step": 13800 |
|
}, |
|
{ |
|
"epoch": 315.91, |
|
"learning_rate": 5.932885906040269e-06, |
|
"loss": 0.0006, |
|
"step": 13900 |
|
}, |
|
{ |
|
"epoch": 318.18, |
|
"learning_rate": 5.395973154362416e-06, |
|
"loss": 0.0002, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 318.18, |
|
"eval_las": 65.65919749869724, |
|
"eval_loss": 7.587600231170654, |
|
"eval_runtime": 5.2373, |
|
"eval_samples_per_second": 152.751, |
|
"eval_steps_per_second": 19.094, |
|
"eval_uas": 73.95344797637658, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 320.45, |
|
"learning_rate": 4.859060402684564e-06, |
|
"loss": 0.0002, |
|
"step": 14100 |
|
}, |
|
{ |
|
"epoch": 322.73, |
|
"learning_rate": 4.322147651006712e-06, |
|
"loss": 0.0004, |
|
"step": 14200 |
|
}, |
|
{ |
|
"epoch": 325.0, |
|
"learning_rate": 3.7852348993288595e-06, |
|
"loss": 0.0003, |
|
"step": 14300 |
|
}, |
|
{ |
|
"epoch": 327.27, |
|
"learning_rate": 3.248322147651007e-06, |
|
"loss": 0.0002, |
|
"step": 14400 |
|
}, |
|
{ |
|
"epoch": 329.55, |
|
"learning_rate": 2.7114093959731548e-06, |
|
"loss": 0.0004, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 329.55, |
|
"eval_las": 65.58971686642349, |
|
"eval_loss": 7.770998477935791, |
|
"eval_runtime": 5.2451, |
|
"eval_samples_per_second": 152.522, |
|
"eval_steps_per_second": 19.065, |
|
"eval_uas": 73.80580163279485, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 331.82, |
|
"learning_rate": 2.174496644295302e-06, |
|
"loss": 0.0002, |
|
"step": 14600 |
|
}, |
|
{ |
|
"epoch": 334.09, |
|
"learning_rate": 1.6375838926174498e-06, |
|
"loss": 0.0002, |
|
"step": 14700 |
|
}, |
|
{ |
|
"epoch": 336.36, |
|
"learning_rate": 1.1006711409395974e-06, |
|
"loss": 0.0002, |
|
"step": 14800 |
|
}, |
|
{ |
|
"epoch": 338.64, |
|
"learning_rate": 5.63758389261745e-07, |
|
"loss": 0.0002, |
|
"step": 14900 |
|
}, |
|
{ |
|
"epoch": 340.91, |
|
"learning_rate": 2.6845637583892618e-08, |
|
"loss": 0.0002, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 340.91, |
|
"eval_las": 65.53760639221818, |
|
"eval_loss": 7.7758870124816895, |
|
"eval_runtime": 5.264, |
|
"eval_samples_per_second": 151.977, |
|
"eval_steps_per_second": 18.997, |
|
"eval_uas": 73.83185686989752, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 340.91, |
|
"step": 15000, |
|
"total_flos": 7.969472217071616e+16, |
|
"train_loss": 0.04560325266600897, |
|
"train_runtime": 8075.9674, |
|
"train_samples_per_second": 59.436, |
|
"train_steps_per_second": 1.857 |
|
} |
|
], |
|
"max_steps": 15000, |
|
"num_train_epochs": 341, |
|
"total_flos": 7.969472217071616e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|