|
{ |
|
"best_metric": 91.75966152710339, |
|
"best_model_checkpoint": "/scratch/mrahma45/pixel/finetuned_models/mbert/mbert-base-finetuned-parsing-ud-Hindi-HDTB/checkpoint-6500", |
|
"epoch": 21.634615384615383, |
|
"global_step": 9000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 7.840000000000001e-05, |
|
"loss": 2.8321, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 7.947382550335571e-05, |
|
"loss": 0.7588, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 7.893691275167786e-05, |
|
"loss": 0.607, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 7.840000000000001e-05, |
|
"loss": 0.5116, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 7.786308724832216e-05, |
|
"loss": 0.3944, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"eval_las": 89.23531249112644, |
|
"eval_loss": 0.415132611989975, |
|
"eval_runtime": 11.8077, |
|
"eval_samples_per_second": 140.501, |
|
"eval_steps_per_second": 17.616, |
|
"eval_uas": 93.45202601016554, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 7.73261744966443e-05, |
|
"loss": 0.3888, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 7.678926174496645e-05, |
|
"loss": 0.3689, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 7.62523489932886e-05, |
|
"loss": 0.3628, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 7.571543624161075e-05, |
|
"loss": 0.291, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 7.51785234899329e-05, |
|
"loss": 0.265, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"eval_las": 90.46199278757419, |
|
"eval_loss": 0.37675201892852783, |
|
"eval_runtime": 11.7912, |
|
"eval_samples_per_second": 140.698, |
|
"eval_steps_per_second": 17.64, |
|
"eval_uas": 94.2726524121873, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"learning_rate": 7.464161073825505e-05, |
|
"loss": 0.2676, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"learning_rate": 7.410469798657718e-05, |
|
"loss": 0.2719, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 3.12, |
|
"learning_rate": 7.356778523489933e-05, |
|
"loss": 0.2325, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 3.37, |
|
"learning_rate": 7.303087248322148e-05, |
|
"loss": 0.2006, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 3.61, |
|
"learning_rate": 7.249395973154363e-05, |
|
"loss": 0.2103, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 3.61, |
|
"eval_las": 91.04409802084221, |
|
"eval_loss": 0.3845142126083374, |
|
"eval_runtime": 11.7995, |
|
"eval_samples_per_second": 140.6, |
|
"eval_steps_per_second": 17.628, |
|
"eval_uas": 94.56512479768294, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 3.85, |
|
"learning_rate": 7.195704697986577e-05, |
|
"loss": 0.2208, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 4.09, |
|
"learning_rate": 7.142013422818792e-05, |
|
"loss": 0.1948, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 4.33, |
|
"learning_rate": 7.088322147651007e-05, |
|
"loss": 0.1553, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 4.57, |
|
"learning_rate": 7.034630872483222e-05, |
|
"loss": 0.1593, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 4.81, |
|
"learning_rate": 6.980939597315437e-05, |
|
"loss": 0.1655, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 4.81, |
|
"eval_las": 91.03557940767243, |
|
"eval_loss": 0.3911457359790802, |
|
"eval_runtime": 11.7868, |
|
"eval_samples_per_second": 140.751, |
|
"eval_steps_per_second": 17.647, |
|
"eval_uas": 94.65882954255048, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 5.05, |
|
"learning_rate": 6.927248322147651e-05, |
|
"loss": 0.1562, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 5.29, |
|
"learning_rate": 6.873557046979866e-05, |
|
"loss": 0.1157, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 5.53, |
|
"learning_rate": 6.819865771812081e-05, |
|
"loss": 0.1221, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 5.77, |
|
"learning_rate": 6.766174496644296e-05, |
|
"loss": 0.1336, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 6.01, |
|
"learning_rate": 6.712483221476511e-05, |
|
"loss": 0.1319, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 6.01, |
|
"eval_las": 90.95323281369792, |
|
"eval_loss": 0.44331711530685425, |
|
"eval_runtime": 11.7942, |
|
"eval_samples_per_second": 140.663, |
|
"eval_steps_per_second": 17.636, |
|
"eval_uas": 94.61055740125508, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 6.25, |
|
"learning_rate": 6.658791946308726e-05, |
|
"loss": 0.0893, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 6.49, |
|
"learning_rate": 6.60510067114094e-05, |
|
"loss": 0.0981, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 6.73, |
|
"learning_rate": 6.551409395973155e-05, |
|
"loss": 0.099, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 6.97, |
|
"learning_rate": 6.49771812080537e-05, |
|
"loss": 0.1028, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 7.21, |
|
"learning_rate": 6.444026845637585e-05, |
|
"loss": 0.0774, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 7.21, |
|
"eval_las": 91.02138171905614, |
|
"eval_loss": 0.534425675868988, |
|
"eval_runtime": 11.7987, |
|
"eval_samples_per_second": 140.609, |
|
"eval_steps_per_second": 17.629, |
|
"eval_uas": 94.7184598347389, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 7.45, |
|
"learning_rate": 6.3903355704698e-05, |
|
"loss": 0.0774, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 7.69, |
|
"learning_rate": 6.336644295302015e-05, |
|
"loss": 0.0815, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 7.93, |
|
"learning_rate": 6.28295302013423e-05, |
|
"loss": 0.0855, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 8.17, |
|
"learning_rate": 6.229261744966444e-05, |
|
"loss": 0.0633, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 8.41, |
|
"learning_rate": 6.175570469798658e-05, |
|
"loss": 0.0655, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 8.41, |
|
"eval_las": 91.10088877530738, |
|
"eval_loss": 0.5579993724822998, |
|
"eval_runtime": 11.8126, |
|
"eval_samples_per_second": 140.444, |
|
"eval_steps_per_second": 17.608, |
|
"eval_uas": 94.6304341653179, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 8.65, |
|
"learning_rate": 6.121879194630873e-05, |
|
"loss": 0.0654, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 8.89, |
|
"learning_rate": 6.068187919463087e-05, |
|
"loss": 0.0692, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 9.13, |
|
"learning_rate": 6.014496644295302e-05, |
|
"loss": 0.0608, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 9.38, |
|
"learning_rate": 5.9613422818791955e-05, |
|
"loss": 0.0529, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 9.62, |
|
"learning_rate": 5.90765100671141e-05, |
|
"loss": 0.0568, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 9.62, |
|
"eval_las": 91.29397734048896, |
|
"eval_loss": 0.5721077919006348, |
|
"eval_runtime": 11.8178, |
|
"eval_samples_per_second": 140.382, |
|
"eval_steps_per_second": 17.601, |
|
"eval_uas": 94.87179487179486, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 9.86, |
|
"learning_rate": 5.853959731543625e-05, |
|
"loss": 0.0554, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 10.1, |
|
"learning_rate": 5.80026845637584e-05, |
|
"loss": 0.0517, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 10.34, |
|
"learning_rate": 5.7465771812080534e-05, |
|
"loss": 0.0441, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 10.58, |
|
"learning_rate": 5.692885906040268e-05, |
|
"loss": 0.0471, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 10.82, |
|
"learning_rate": 5.639194630872483e-05, |
|
"loss": 0.0462, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 10.82, |
|
"eval_las": 91.28261918959593, |
|
"eval_loss": 0.6217162609100342, |
|
"eval_runtime": 11.7974, |
|
"eval_samples_per_second": 140.624, |
|
"eval_steps_per_second": 17.631, |
|
"eval_uas": 94.88599256041117, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 11.06, |
|
"learning_rate": 5.5855033557046986e-05, |
|
"loss": 0.0483, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 11.3, |
|
"learning_rate": 5.5318120805369134e-05, |
|
"loss": 0.0393, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 11.54, |
|
"learning_rate": 5.478120805369128e-05, |
|
"loss": 0.0402, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 11.78, |
|
"learning_rate": 5.424429530201343e-05, |
|
"loss": 0.0424, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 12.02, |
|
"learning_rate": 5.370738255033558e-05, |
|
"loss": 0.0418, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 12.02, |
|
"eval_las": 91.32237271772155, |
|
"eval_loss": 0.6571192145347595, |
|
"eval_runtime": 11.8066, |
|
"eval_samples_per_second": 140.515, |
|
"eval_steps_per_second": 17.617, |
|
"eval_uas": 94.87747394724138, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 12.26, |
|
"learning_rate": 5.317046979865773e-05, |
|
"loss": 0.0339, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 12.5, |
|
"learning_rate": 5.2633557046979875e-05, |
|
"loss": 0.0361, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 12.74, |
|
"learning_rate": 5.2096644295302024e-05, |
|
"loss": 0.0347, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 12.98, |
|
"learning_rate": 5.155973154362417e-05, |
|
"loss": 0.0374, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 13.22, |
|
"learning_rate": 5.1022818791946307e-05, |
|
"loss": 0.0304, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 13.22, |
|
"eval_las": 91.43879376437516, |
|
"eval_loss": 0.6834462881088257, |
|
"eval_runtime": 11.776, |
|
"eval_samples_per_second": 140.879, |
|
"eval_steps_per_second": 17.663, |
|
"eval_uas": 95.01661129568106, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 13.46, |
|
"learning_rate": 5.0485906040268455e-05, |
|
"loss": 0.0299, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 13.7, |
|
"learning_rate": 4.99489932885906e-05, |
|
"loss": 0.0335, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 13.94, |
|
"learning_rate": 4.941208053691275e-05, |
|
"loss": 0.0331, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 14.18, |
|
"learning_rate": 4.88751677852349e-05, |
|
"loss": 0.026, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 14.42, |
|
"learning_rate": 4.833825503355705e-05, |
|
"loss": 0.0255, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 14.42, |
|
"eval_las": 91.52965897151944, |
|
"eval_loss": 0.7086959481239319, |
|
"eval_runtime": 11.7728, |
|
"eval_samples_per_second": 140.918, |
|
"eval_steps_per_second": 17.668, |
|
"eval_uas": 95.10179742737883, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 14.66, |
|
"learning_rate": 4.7801342281879196e-05, |
|
"loss": 0.0273, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 14.9, |
|
"learning_rate": 4.726442953020135e-05, |
|
"loss": 0.0286, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 15.14, |
|
"learning_rate": 4.67275167785235e-05, |
|
"loss": 0.0219, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 15.38, |
|
"learning_rate": 4.619060402684565e-05, |
|
"loss": 0.0216, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 15.62, |
|
"learning_rate": 4.565906040268457e-05, |
|
"loss": 0.0254, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 15.62, |
|
"eval_las": 91.75966152710339, |
|
"eval_loss": 0.7071714401245117, |
|
"eval_runtime": 11.7877, |
|
"eval_samples_per_second": 140.74, |
|
"eval_steps_per_second": 17.646, |
|
"eval_uas": 95.23525570037198, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 15.87, |
|
"learning_rate": 4.5122147651006716e-05, |
|
"loss": 0.0242, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 16.11, |
|
"learning_rate": 4.4585234899328864e-05, |
|
"loss": 0.023, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 16.35, |
|
"learning_rate": 4.404832214765101e-05, |
|
"loss": 0.0222, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 16.59, |
|
"learning_rate": 4.351140939597316e-05, |
|
"loss": 0.0198, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 16.83, |
|
"learning_rate": 4.297449664429531e-05, |
|
"loss": 0.0211, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 16.83, |
|
"eval_las": 91.60632649004742, |
|
"eval_loss": 0.7650447487831116, |
|
"eval_runtime": 11.8115, |
|
"eval_samples_per_second": 140.456, |
|
"eval_steps_per_second": 17.61, |
|
"eval_uas": 95.07908112559275, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 17.07, |
|
"learning_rate": 4.243758389261746e-05, |
|
"loss": 0.0202, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 17.31, |
|
"learning_rate": 4.1900671140939605e-05, |
|
"loss": 0.0165, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 17.55, |
|
"learning_rate": 4.1363758389261754e-05, |
|
"loss": 0.021, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 17.79, |
|
"learning_rate": 4.0826845637583895e-05, |
|
"loss": 0.0175, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 18.03, |
|
"learning_rate": 4.028993288590604e-05, |
|
"loss": 0.019, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 18.03, |
|
"eval_las": 91.65743816906607, |
|
"eval_loss": 0.7897760272026062, |
|
"eval_runtime": 11.7771, |
|
"eval_samples_per_second": 140.867, |
|
"eval_steps_per_second": 17.661, |
|
"eval_uas": 95.08759973876253, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 18.27, |
|
"learning_rate": 3.975302013422819e-05, |
|
"loss": 0.015, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 18.51, |
|
"learning_rate": 3.921610738255034e-05, |
|
"loss": 0.0166, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 18.75, |
|
"learning_rate": 3.867919463087249e-05, |
|
"loss": 0.0165, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 18.99, |
|
"learning_rate": 3.8142281879194636e-05, |
|
"loss": 0.0156, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 19.23, |
|
"learning_rate": 3.7605369127516784e-05, |
|
"loss": 0.0125, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 19.23, |
|
"eval_las": 91.5183008206264, |
|
"eval_loss": 0.7997450828552246, |
|
"eval_runtime": 11.8137, |
|
"eval_samples_per_second": 140.431, |
|
"eval_steps_per_second": 17.607, |
|
"eval_uas": 94.9995740693415, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 19.47, |
|
"learning_rate": 3.706845637583893e-05, |
|
"loss": 0.0155, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 19.71, |
|
"learning_rate": 3.653154362416108e-05, |
|
"loss": 0.0155, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 19.95, |
|
"learning_rate": 3.599463087248322e-05, |
|
"loss": 0.0142, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 20.19, |
|
"learning_rate": 3.545771812080537e-05, |
|
"loss": 0.0096, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 20.43, |
|
"learning_rate": 3.492080536912752e-05, |
|
"loss": 0.0132, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 20.43, |
|
"eval_las": 91.67731493312888, |
|
"eval_loss": 0.8170235753059387, |
|
"eval_runtime": 11.7652, |
|
"eval_samples_per_second": 141.009, |
|
"eval_steps_per_second": 17.679, |
|
"eval_uas": 95.12735326688815, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 20.67, |
|
"learning_rate": 3.438389261744967e-05, |
|
"loss": 0.0122, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 20.91, |
|
"learning_rate": 3.3846979865771815e-05, |
|
"loss": 0.0116, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 21.15, |
|
"learning_rate": 3.3310067114093964e-05, |
|
"loss": 0.0114, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 21.39, |
|
"learning_rate": 3.277315436241611e-05, |
|
"loss": 0.0089, |
|
"step": 8900 |
|
}, |
|
{ |
|
"epoch": 21.63, |
|
"learning_rate": 3.223624161073826e-05, |
|
"loss": 0.0115, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 21.63, |
|
"eval_las": 91.60348695232416, |
|
"eval_loss": 0.8960289359092712, |
|
"eval_runtime": 12.1056, |
|
"eval_samples_per_second": 137.044, |
|
"eval_steps_per_second": 17.182, |
|
"eval_uas": 95.08759973876253, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 21.63, |
|
"step": 9000, |
|
"total_flos": 4.80612874451927e+16, |
|
"train_loss": 0.13298969575431613, |
|
"train_runtime": 5206.9637, |
|
"train_samples_per_second": 92.184, |
|
"train_steps_per_second": 2.881 |
|
} |
|
], |
|
"max_steps": 15000, |
|
"num_train_epochs": 37, |
|
"total_flos": 4.80612874451927e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|