{ "best_metric": 0.8023715415019763, "best_model_checkpoint": "./toxicity_c_202201181030/checkpoint-3000", "epoch": 1.2414895617829602, "global_step": 3300, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.04, "learning_rate": 2.94e-05, "loss": 0.2336, "step": 100 }, { "epoch": 0.04, "eval_accuracy": 0.9445713034502174, "eval_f1": 0.6696495152870993, "eval_loss": 0.1531878262758255, "eval_precision": 0.8655421686746988, "eval_recall": 0.5460626330191548, "eval_runtime": 209.3715, "eval_samples_per_second": 152.69, "eval_steps_per_second": 19.09, "step": 100 }, { "epoch": 0.08, "learning_rate": 2.88e-05, "loss": 0.1549, "step": 200 }, { "epoch": 0.08, "eval_accuracy": 0.9456973943507773, "eval_f1": 0.6756352765321375, "eval_loss": 0.1629406213760376, "eval_precision": 0.8763936015511391, "eval_recall": 0.5497111584068106, "eval_runtime": 209.809, "eval_samples_per_second": 152.372, "eval_steps_per_second": 19.051, "step": 200 }, { "epoch": 0.11, "learning_rate": 2.8199999999999998e-05, "loss": 0.158, "step": 300 }, { "epoch": 0.11, "eval_accuracy": 0.949920235227877, "eval_f1": 0.7275820997107366, "eval_loss": 0.13407668471336365, "eval_precision": 0.8261205564142194, "eval_recall": 0.6500456065673457, "eval_runtime": 210.4551, "eval_samples_per_second": 151.904, "eval_steps_per_second": 18.992, "step": 300 }, { "epoch": 0.15, "learning_rate": 2.7600000000000003e-05, "loss": 0.1321, "step": 400 }, { "epoch": 0.15, "eval_accuracy": 0.9515468109731302, "eval_f1": 0.7355301348813386, "eval_loss": 0.1435689479112625, "eval_precision": 0.8387850467289719, "eval_recall": 0.6549103070842202, "eval_runtime": 209.6366, "eval_samples_per_second": 152.497, "eval_steps_per_second": 19.066, "step": 400 }, { "epoch": 0.19, "learning_rate": 2.7000000000000002e-05, "loss": 0.1408, "step": 500 }, { "epoch": 0.19, "eval_accuracy": 0.9473865307016172, "eval_f1": 0.7431276725717777, "eval_loss": 0.13682132959365845, "eval_precision": 0.7465480208652961, "eval_recall": 0.739738522347218, "eval_runtime": 210.8965, "eval_samples_per_second": 151.586, "eval_steps_per_second": 18.952, "step": 500 }, { "epoch": 0.23, "learning_rate": 2.64e-05, "loss": 0.1385, "step": 600 }, { "epoch": 0.23, "eval_accuracy": 0.9507960837060903, "eval_f1": 0.7527118377613583, "eval_loss": 0.12854768335819244, "eval_precision": 0.779296875, "eval_recall": 0.7278808148373366, "eval_runtime": 210.2245, "eval_samples_per_second": 152.071, "eval_steps_per_second": 19.013, "step": 600 }, { "epoch": 0.26, "learning_rate": 2.58e-05, "loss": 0.1313, "step": 700 }, { "epoch": 0.26, "eval_accuracy": 0.9529857049016235, "eval_f1": 0.7370078740157481, "eval_loss": 0.14518482983112335, "eval_precision": 0.8680956306677658, "eval_recall": 0.6403162055335968, "eval_runtime": 210.6173, "eval_samples_per_second": 151.787, "eval_steps_per_second": 18.978, "step": 700 }, { "epoch": 0.3, "learning_rate": 2.52e-05, "loss": 0.1399, "step": 800 }, { "epoch": 0.3, "eval_accuracy": 0.949826394319497, "eval_f1": 0.7610250297973777, "eval_loss": 0.13412030041217804, "eval_precision": 0.7461291264972246, "eval_recall": 0.7765278200060809, "eval_runtime": 209.9319, "eval_samples_per_second": 152.283, "eval_steps_per_second": 19.04, "step": 800 }, { "epoch": 0.34, "learning_rate": 2.4599999999999998e-05, "loss": 0.1272, "step": 900 }, { "epoch": 0.34, "eval_accuracy": 0.9536425912602834, "eval_f1": 0.7640878701050622, "eval_loss": 0.12946395576000214, "eval_precision": 0.8018710324089542, "eval_recall": 0.7297050775311645, "eval_runtime": 210.1105, "eval_samples_per_second": 152.153, "eval_steps_per_second": 19.023, "step": 900 }, { "epoch": 0.38, "learning_rate": 2.4e-05, "loss": 0.1324, "step": 1000 }, { "epoch": 0.38, "eval_accuracy": 0.9478244549407239, "eval_f1": 0.7562116340251388, "eval_loss": 0.14241644740104675, "eval_precision": 0.7281170841542358, "eval_recall": 0.7865612648221344, "eval_runtime": 210.5453, "eval_samples_per_second": 151.839, "eval_steps_per_second": 18.984, "step": 1000 }, { "epoch": 0.41, "learning_rate": 2.3400000000000003e-05, "loss": 0.1198, "step": 1100 }, { "epoch": 0.41, "eval_accuracy": 0.9530482655072101, "eval_f1": 0.7681137030743087, "eval_loss": 0.1265845000743866, "eval_precision": 0.7807788944723618, "eval_recall": 0.7558528428093646, "eval_runtime": 210.173, "eval_samples_per_second": 152.108, "eval_steps_per_second": 19.018, "step": 1100 }, { "epoch": 0.45, "learning_rate": 2.2800000000000002e-05, "loss": 0.1232, "step": 1200 }, { "epoch": 0.45, "eval_accuracy": 0.9558634927586099, "eval_f1": 0.7611308616895208, "eval_loss": 0.11988817900419235, "eval_precision": 0.8586707410236822, "eval_recall": 0.6834904226208574, "eval_runtime": 209.5961, "eval_samples_per_second": 152.527, "eval_steps_per_second": 19.07, "step": 1200 }, { "epoch": 0.49, "learning_rate": 2.22e-05, "loss": 0.1239, "step": 1300 }, { "epoch": 0.49, "eval_accuracy": 0.9564578185116832, "eval_f1": 0.7667560321715818, "eval_loss": 0.12424681335687637, "eval_precision": 0.8540500186636805, "eval_recall": 0.6956521739130435, "eval_runtime": 210.1306, "eval_samples_per_second": 152.139, "eval_steps_per_second": 19.022, "step": 1300 }, { "epoch": 0.53, "learning_rate": 2.16e-05, "loss": 0.1236, "step": 1400 }, { "epoch": 0.53, "eval_accuracy": 0.9570834245675498, "eval_f1": 0.7704918032786885, "eval_loss": 0.1350642293691635, "eval_precision": 0.8564522127184827, "eval_recall": 0.7002128306476133, "eval_runtime": 210.8014, "eval_samples_per_second": 151.655, "eval_steps_per_second": 18.961, "step": 1400 }, { "epoch": 0.56, "learning_rate": 2.1e-05, "loss": 0.1254, "step": 1500 }, { "epoch": 0.56, "eval_accuracy": 0.9575526291094498, "eval_f1": 0.7775774463202754, "eval_loss": 0.11828587204217911, "eval_precision": 0.8435277382645804, "eval_recall": 0.7211918516266342, "eval_runtime": 209.8683, "eval_samples_per_second": 152.329, "eval_steps_per_second": 19.045, "step": 1500 }, { "epoch": 0.6, "learning_rate": 2.04e-05, "loss": 0.1252, "step": 1600 }, { "epoch": 0.6, "eval_accuracy": 0.9568644624479965, "eval_f1": 0.7638294228463779, "eval_loss": 0.12166079878807068, "eval_precision": 0.8745098039215686, "eval_recall": 0.6780176345393737, "eval_runtime": 210.147, "eval_samples_per_second": 152.127, "eval_steps_per_second": 19.02, "step": 1600 }, { "epoch": 0.64, "learning_rate": 1.98e-05, "loss": 0.1256, "step": 1700 }, { "epoch": 0.64, "eval_accuracy": 0.9574275078982765, "eval_f1": 0.7709910819451455, "eval_loss": 0.11569029092788696, "eval_precision": 0.8632253202712886, "eval_recall": 0.6965643052599574, "eval_runtime": 210.5963, "eval_samples_per_second": 151.802, "eval_steps_per_second": 18.979, "step": 1700 }, { "epoch": 0.68, "learning_rate": 1.9200000000000003e-05, "loss": 0.1324, "step": 1800 }, { "epoch": 0.68, "eval_accuracy": 0.95426819731615, "eval_f1": 0.7747303543913714, "eval_loss": 0.12124760448932648, "eval_precision": 0.7853795688847235, "eval_recall": 0.7643660687138948, "eval_runtime": 210.0339, "eval_samples_per_second": 152.209, "eval_steps_per_second": 19.03, "step": 1800 }, { "epoch": 0.71, "learning_rate": 1.86e-05, "loss": 0.1192, "step": 1900 }, { "epoch": 0.71, "eval_accuracy": 0.9576777503206231, "eval_f1": 0.7719534805326143, "eval_loss": 0.12099746614694595, "eval_precision": 0.8661119515885023, "eval_recall": 0.6962602614776527, "eval_runtime": 209.8273, "eval_samples_per_second": 152.359, "eval_steps_per_second": 19.049, "step": 1900 }, { "epoch": 0.75, "learning_rate": 1.8e-05, "loss": 0.1234, "step": 2000 }, { "epoch": 0.75, "eval_accuracy": 0.9573023866871031, "eval_f1": 0.781074578989575, "eval_loss": 0.11586691439151764, "eval_precision": 0.8265444670739986, "eval_recall": 0.7403466099118273, "eval_runtime": 209.7848, "eval_samples_per_second": 152.39, "eval_steps_per_second": 19.053, "step": 2000 }, { "epoch": 0.79, "learning_rate": 1.74e-05, "loss": 0.1129, "step": 2100 }, { "epoch": 0.79, "eval_accuracy": 0.9564578185116832, "eval_f1": 0.7782733354571519, "eval_loss": 0.12270316481590271, "eval_precision": 0.8173302107728337, "eval_recall": 0.7427789601702646, "eval_runtime": 209.6077, "eval_samples_per_second": 152.518, "eval_steps_per_second": 19.069, "step": 2100 }, { "epoch": 0.83, "learning_rate": 1.6800000000000002e-05, "loss": 0.1244, "step": 2200 }, { "epoch": 0.83, "eval_accuracy": 0.9576777503206231, "eval_f1": 0.776769509981851, "eval_loss": 0.11879286915063858, "eval_precision": 0.8492063492063492, "eval_recall": 0.7157190635451505, "eval_runtime": 209.8694, "eval_samples_per_second": 152.328, "eval_steps_per_second": 19.045, "step": 2200 }, { "epoch": 0.87, "learning_rate": 1.62e-05, "loss": 0.1198, "step": 2300 }, { "epoch": 0.87, "eval_accuracy": 0.9573336669898964, "eval_f1": 0.7855345911949686, "eval_loss": 0.12149158120155334, "eval_precision": 0.8134158254640182, "eval_recall": 0.7595013681970204, "eval_runtime": 209.8421, "eval_samples_per_second": 152.348, "eval_steps_per_second": 19.048, "step": 2300 }, { "epoch": 0.9, "learning_rate": 1.56e-05, "loss": 0.1167, "step": 2400 }, { "epoch": 0.9, "eval_accuracy": 0.9582095154681097, "eval_f1": 0.7829054273643159, "eval_loss": 0.11379627883434296, "eval_precision": 0.8408376963350785, "eval_recall": 0.7324414715719063, "eval_runtime": 210.0964, "eval_samples_per_second": 152.163, "eval_steps_per_second": 19.025, "step": 2400 }, { "epoch": 0.94, "learning_rate": 1.5e-05, "loss": 0.1162, "step": 2500 }, { "epoch": 0.94, "eval_accuracy": 0.9579592730457631, "eval_f1": 0.7870722433460076, "eval_loss": 0.11324156820774078, "eval_precision": 0.8217002977174992, "eval_recall": 0.7552447552447552, "eval_runtime": 210.2617, "eval_samples_per_second": 152.044, "eval_steps_per_second": 19.01, "step": 2500 }, { "epoch": 0.98, "learning_rate": 1.44e-05, "loss": 0.1202, "step": 2600 }, { "epoch": 0.98, "eval_accuracy": 0.9577403109262098, "eval_f1": 0.785385226370135, "eval_loss": 0.11547254770994186, "eval_precision": 0.8223552894211577, "eval_recall": 0.7515962298570994, "eval_runtime": 209.2873, "eval_samples_per_second": 152.752, "eval_steps_per_second": 19.098, "step": 2600 }, { "epoch": 1.02, "learning_rate": 1.3800000000000002e-05, "loss": 0.1069, "step": 2700 }, { "epoch": 1.02, "eval_accuracy": 0.9589289624323564, "eval_f1": 0.7883282282766404, "eval_loss": 0.11415638774633408, "eval_precision": 0.8390528483184626, "eval_recall": 0.7433870477348739, "eval_runtime": 209.3049, "eval_samples_per_second": 152.739, "eval_steps_per_second": 19.097, "step": 2700 }, { "epoch": 1.05, "learning_rate": 1.32e-05, "loss": 0.0866, "step": 2800 }, { "epoch": 1.05, "eval_accuracy": 0.9558947730614032, "eval_f1": 0.7875866224766495, "eval_loss": 0.12708334624767303, "eval_precision": 0.7805315019408778, "eval_recall": 0.7947704469443599, "eval_runtime": 208.8539, "eval_samples_per_second": 153.069, "eval_steps_per_second": 19.138, "step": 2800 }, { "epoch": 1.09, "learning_rate": 1.26e-05, "loss": 0.0913, "step": 2900 }, { "epoch": 1.09, "eval_accuracy": 0.9557696518502299, "eval_f1": 0.7855626326963907, "eval_loss": 0.12602387368679047, "eval_precision": 0.783661119515885, "eval_recall": 0.7874733961690483, "eval_runtime": 209.1412, "eval_samples_per_second": 152.858, "eval_steps_per_second": 19.111, "step": 2900 }, { "epoch": 1.13, "learning_rate": 1.2e-05, "loss": 0.087, "step": 3000 }, { "epoch": 1.13, "eval_accuracy": 0.95548812912509, "eval_f1": 0.7876436352783167, "eval_loss": 0.12803590297698975, "eval_precision": 0.7734466588511137, "eval_recall": 0.8023715415019763, "eval_runtime": 209.0203, "eval_samples_per_second": 152.947, "eval_steps_per_second": 19.123, "step": 3000 }, { "epoch": 1.17, "learning_rate": 1.1400000000000001e-05, "loss": 0.0839, "step": 3100 }, { "epoch": 1.17, "eval_accuracy": 0.958991523037943, "eval_f1": 0.7922016167379933, "eval_loss": 0.12513290345668793, "eval_precision": 0.8274834437086093, "eval_recall": 0.759805411979325, "eval_runtime": 209.1866, "eval_samples_per_second": 152.825, "eval_steps_per_second": 19.107, "step": 3100 }, { "epoch": 1.2, "learning_rate": 1.08e-05, "loss": 0.0889, "step": 3200 }, { "epoch": 1.2, "eval_accuracy": 0.9583033563764898, "eval_f1": 0.7890489001424276, "eval_loss": 0.12521126866340637, "eval_precision": 0.8227722772277227, "eval_recall": 0.7579811492854971, "eval_runtime": 209.1839, "eval_samples_per_second": 152.827, "eval_steps_per_second": 19.108, "step": 3200 }, { "epoch": 1.24, "learning_rate": 1.02e-05, "loss": 0.0887, "step": 3300 }, { "epoch": 1.24, "eval_accuracy": 0.9562701366949232, "eval_f1": 0.7879890809827116, "eval_loss": 0.12457986176013947, "eval_precision": 0.7860816944024206, "eval_recall": 0.7899057464274856, "eval_runtime": 209.6152, "eval_samples_per_second": 152.513, "eval_steps_per_second": 19.068, "step": 3300 } ], "max_steps": 5000, "num_train_epochs": 2, "total_flos": 2.279247411490296e+16, "trial_name": null, "trial_params": null }