{ "best_metric": null, "best_model_checkpoint": null, "epoch": 3.0, "global_step": 57, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.05, "learning_rate": 3e-05, "loss": 2.6941, "step": 1 }, { "epoch": 0.05, "eval_accuracy": 0.062219502243982046, "eval_loss": 2.654296875, "eval_runtime": 3.1639, "eval_samples_per_second": 18.016, "eval_steps_per_second": 1.264, "step": 1 }, { "epoch": 0.11, "learning_rate": 3e-05, "loss": 2.6914, "step": 2 }, { "epoch": 0.11, "eval_accuracy": 0.062219502243982046, "eval_loss": 2.654296875, "eval_runtime": 3.1507, "eval_samples_per_second": 18.091, "eval_steps_per_second": 1.27, "step": 2 }, { "epoch": 0.16, "learning_rate": 2.997722274649974e-05, "loss": 2.6003, "step": 3 }, { "epoch": 0.16, "eval_accuracy": 0.06265016546534294, "eval_loss": 2.6015625, "eval_runtime": 3.124, "eval_samples_per_second": 18.246, "eval_steps_per_second": 1.28, "step": 3 }, { "epoch": 0.21, "learning_rate": 2.9908960159769243e-05, "loss": 2.5603, "step": 4 }, { "epoch": 0.21, "eval_accuracy": 0.06265016546534294, "eval_loss": 2.5703125, "eval_runtime": 3.1396, "eval_samples_per_second": 18.155, "eval_steps_per_second": 1.274, "step": 4 }, { "epoch": 0.26, "learning_rate": 2.9795419551040836e-05, "loss": 2.6072, "step": 5 }, { "epoch": 0.26, "eval_accuracy": 0.06296749626002993, "eval_loss": 2.55078125, "eval_runtime": 2.3207, "eval_samples_per_second": 24.561, "eval_steps_per_second": 1.724, "step": 5 }, { "epoch": 0.32, "learning_rate": 2.9636945739411533e-05, "loss": 2.5444, "step": 6 }, { "epoch": 0.32, "eval_accuracy": 0.06283149734802121, "eval_loss": 2.546875, "eval_runtime": 3.1226, "eval_samples_per_second": 18.254, "eval_steps_per_second": 1.281, "step": 6 }, { "epoch": 0.37, "learning_rate": 2.9434020004638757e-05, "loss": 2.4467, "step": 7 }, { "epoch": 0.37, "eval_accuracy": 0.06292216328936036, "eval_loss": 2.55078125, "eval_runtime": 2.1195, "eval_samples_per_second": 26.894, "eval_steps_per_second": 1.887, "step": 7 }, { "epoch": 0.42, "learning_rate": 2.9187258625509518e-05, "loss": 2.5452, "step": 8 }, { "epoch": 0.42, "eval_accuracy": 0.06287683031869079, "eval_loss": 2.55078125, "eval_runtime": 2.717, "eval_samples_per_second": 20.979, "eval_steps_per_second": 1.472, "step": 8 }, { "epoch": 0.47, "learning_rate": 2.8897411008222026e-05, "loss": 2.6128, "step": 9 }, { "epoch": 0.47, "eval_accuracy": 0.06308082868670384, "eval_loss": 2.544921875, "eval_runtime": 2.9144, "eval_samples_per_second": 19.558, "eval_steps_per_second": 1.372, "step": 9 }, { "epoch": 0.53, "learning_rate": 2.8565357410463664e-05, "loss": 2.4568, "step": 10 }, { "epoch": 0.53, "eval_accuracy": 0.06265016546534294, "eval_loss": 2.5390625, "eval_runtime": 3.1199, "eval_samples_per_second": 18.27, "eval_steps_per_second": 1.282, "step": 10 }, { "epoch": 0.58, "learning_rate": 2.8192106268097336e-05, "loss": 2.5098, "step": 11 }, { "epoch": 0.58, "eval_accuracy": 0.06278616437735164, "eval_loss": 2.53515625, "eval_runtime": 3.1268, "eval_samples_per_second": 18.23, "eval_steps_per_second": 1.279, "step": 11 }, { "epoch": 0.63, "learning_rate": 2.7778791132574908e-05, "loss": 2.6047, "step": 12 }, { "epoch": 0.63, "eval_accuracy": 0.0631261616573734, "eval_loss": 2.5234375, "eval_runtime": 3.1183, "eval_samples_per_second": 18.279, "eval_steps_per_second": 1.283, "step": 12 }, { "epoch": 0.68, "learning_rate": 2.7326667228378677e-05, "loss": 2.5022, "step": 13 }, { "epoch": 0.68, "eval_accuracy": 0.0630128292306995, "eval_loss": 2.515625, "eval_runtime": 2.5192, "eval_samples_per_second": 22.626, "eval_steps_per_second": 1.588, "step": 13 }, { "epoch": 0.74, "learning_rate": 2.6837107640945904e-05, "loss": 2.605, "step": 14 }, { "epoch": 0.74, "eval_accuracy": 0.06326216056938211, "eval_loss": 2.5078125, "eval_runtime": 3.1482, "eval_samples_per_second": 18.105, "eval_steps_per_second": 1.271, "step": 14 }, { "epoch": 0.79, "learning_rate": 2.6311599146653446e-05, "loss": 2.6055, "step": 15 }, { "epoch": 0.79, "eval_accuracy": 0.06335282651072124, "eval_loss": 2.501953125, "eval_runtime": 3.116, "eval_samples_per_second": 18.293, "eval_steps_per_second": 1.284, "step": 15 }, { "epoch": 0.84, "learning_rate": 2.575173769752677e-05, "loss": 2.5061, "step": 16 }, { "epoch": 0.84, "eval_accuracy": 0.06323949408404733, "eval_loss": 2.49609375, "eval_runtime": 2.7142, "eval_samples_per_second": 21.001, "eval_steps_per_second": 1.474, "step": 16 }, { "epoch": 0.89, "learning_rate": 2.5159223574386117e-05, "loss": 2.4348, "step": 17 }, { "epoch": 0.89, "eval_accuracy": 0.06310349517203863, "eval_loss": 2.490234375, "eval_runtime": 3.1245, "eval_samples_per_second": 18.243, "eval_steps_per_second": 1.28, "step": 17 }, { "epoch": 0.95, "learning_rate": 2.4535856223149525e-05, "loss": 2.6284, "step": 18 }, { "epoch": 0.95, "eval_accuracy": 0.06319416111337776, "eval_loss": 2.48828125, "eval_runtime": 3.1284, "eval_samples_per_second": 18.22, "eval_steps_per_second": 1.279, "step": 18 }, { "epoch": 1.0, "learning_rate": 2.3883528789974703e-05, "loss": 2.5574, "step": 19 }, { "epoch": 1.0, "eval_accuracy": 0.06308082868670384, "eval_loss": 2.486328125, "eval_runtime": 2.5211, "eval_samples_per_second": 22.61, "eval_steps_per_second": 1.587, "step": 19 }, { "epoch": 1.05, "learning_rate": 2.320422237183641e-05, "loss": 2.0814, "step": 20 }, { "epoch": 1.05, "eval_accuracy": 0.0632848270547169, "eval_loss": 2.484375, "eval_runtime": 3.128, "eval_samples_per_second": 18.222, "eval_steps_per_second": 1.279, "step": 20 }, { "epoch": 1.11, "learning_rate": 2.25e-05, "loss": 2.0636, "step": 21 }, { "epoch": 1.11, "eval_accuracy": 0.06351149190806474, "eval_loss": 2.484375, "eval_runtime": 3.1237, "eval_samples_per_second": 18.248, "eval_steps_per_second": 1.281, "step": 21 }, { "epoch": 1.16, "learning_rate": 2.25e-05, "loss": 1.9459, "step": 22 }, { "epoch": 1.16, "eval_accuracy": 0.06351149190806474, "eval_loss": 2.484375, "eval_runtime": 2.9479, "eval_samples_per_second": 19.336, "eval_steps_per_second": 1.357, "step": 22 }, { "epoch": 1.21, "learning_rate": 2.177300037466334e-05, "loss": 2.0527, "step": 23 }, { "epoch": 1.21, "eval_accuracy": 0.06344349245206038, "eval_loss": 2.48828125, "eval_runtime": 2.921, "eval_samples_per_second": 19.514, "eval_steps_per_second": 1.369, "step": 23 }, { "epoch": 1.26, "learning_rate": 2.1025431369794546e-05, "loss": 1.8881, "step": 24 }, { "epoch": 1.26, "eval_accuracy": 0.06348882542272995, "eval_loss": 2.49609375, "eval_runtime": 3.1227, "eval_samples_per_second": 18.253, "eval_steps_per_second": 1.281, "step": 24 }, { "epoch": 1.32, "learning_rate": 2.025956332789132e-05, "loss": 1.8668, "step": 25 }, { "epoch": 1.32, "eval_accuracy": 0.06362482433473865, "eval_loss": 2.51171875, "eval_runtime": 3.1172, "eval_samples_per_second": 18.286, "eval_steps_per_second": 1.283, "step": 25 }, { "epoch": 1.37, "learning_rate": 1.9477722165025422e-05, "loss": 2.0375, "step": 26 }, { "epoch": 1.37, "eval_accuracy": 0.06357949136406908, "eval_loss": 2.529296875, "eval_runtime": 2.719, "eval_samples_per_second": 20.963, "eval_steps_per_second": 1.471, "step": 26 }, { "epoch": 1.42, "learning_rate": 1.8682282307111988e-05, "loss": 1.9402, "step": 27 }, { "epoch": 1.42, "eval_accuracy": 0.06319416111337776, "eval_loss": 2.544921875, "eval_runtime": 3.1228, "eval_samples_per_second": 18.253, "eval_steps_per_second": 1.281, "step": 27 }, { "epoch": 1.47, "learning_rate": 1.7875659478856077e-05, "loss": 1.6086, "step": 28 }, { "epoch": 1.47, "eval_accuracy": 0.06333016002538647, "eval_loss": 2.55859375, "eval_runtime": 2.917, "eval_samples_per_second": 19.541, "eval_steps_per_second": 1.371, "step": 28 }, { "epoch": 1.53, "learning_rate": 1.7060303367276123e-05, "loss": 1.8185, "step": 29 }, { "epoch": 1.53, "eval_accuracy": 0.06319416111337776, "eval_loss": 2.564453125, "eval_runtime": 2.7052, "eval_samples_per_second": 21.07, "eval_steps_per_second": 1.479, "step": 29 }, { "epoch": 1.58, "learning_rate": 1.623869018208499e-05, "loss": 1.7324, "step": 30 }, { "epoch": 1.58, "eval_accuracy": 0.0630128292306995, "eval_loss": 2.560546875, "eval_runtime": 3.1208, "eval_samples_per_second": 18.264, "eval_steps_per_second": 1.282, "step": 30 }, { "epoch": 1.63, "learning_rate": 1.5413315135522434e-05, "loss": 1.9285, "step": 31 }, { "epoch": 1.63, "eval_accuracy": 0.06283149734802121, "eval_loss": 2.552734375, "eval_runtime": 3.1183, "eval_samples_per_second": 18.279, "eval_steps_per_second": 1.283, "step": 31 }, { "epoch": 1.68, "learning_rate": 1.4586684864477572e-05, "loss": 1.8031, "step": 32 }, { "epoch": 1.68, "eval_accuracy": 0.06305816220136906, "eval_loss": 2.544921875, "eval_runtime": 2.9132, "eval_samples_per_second": 19.566, "eval_steps_per_second": 1.373, "step": 32 }, { "epoch": 1.74, "learning_rate": 1.3761309817915017e-05, "loss": 1.7321, "step": 33 }, { "epoch": 1.74, "eval_accuracy": 0.0630128292306995, "eval_loss": 2.53515625, "eval_runtime": 2.7189, "eval_samples_per_second": 20.964, "eval_steps_per_second": 1.471, "step": 33 }, { "epoch": 1.79, "learning_rate": 1.2939696632723877e-05, "loss": 1.7802, "step": 34 }, { "epoch": 1.79, "eval_accuracy": 0.0631488281427082, "eval_loss": 2.525390625, "eval_runtime": 3.1376, "eval_samples_per_second": 18.167, "eval_steps_per_second": 1.275, "step": 34 }, { "epoch": 1.84, "learning_rate": 1.2124340521143929e-05, "loss": 2.0637, "step": 35 }, { "epoch": 1.84, "eval_accuracy": 0.06321682759871254, "eval_loss": 2.515625, "eval_runtime": 3.126, "eval_samples_per_second": 18.234, "eval_steps_per_second": 1.28, "step": 35 }, { "epoch": 1.89, "learning_rate": 1.1317717692888014e-05, "loss": 1.8159, "step": 36 }, { "epoch": 1.89, "eval_accuracy": 0.0632848270547169, "eval_loss": 2.5078125, "eval_runtime": 2.5304, "eval_samples_per_second": 22.526, "eval_steps_per_second": 1.581, "step": 36 }, { "epoch": 1.95, "learning_rate": 1.0522277834974586e-05, "loss": 1.7142, "step": 37 }, { "epoch": 1.95, "eval_accuracy": 0.06344349245206038, "eval_loss": 2.50390625, "eval_runtime": 2.9235, "eval_samples_per_second": 19.497, "eval_steps_per_second": 1.368, "step": 37 }, { "epoch": 2.0, "learning_rate": 9.740436672108686e-06, "loss": 1.8793, "step": 38 }, { "epoch": 2.0, "eval_accuracy": 0.06337549299605603, "eval_loss": 2.5, "eval_runtime": 3.1269, "eval_samples_per_second": 18.229, "eval_steps_per_second": 1.279, "step": 38 }, { "epoch": 2.05, "learning_rate": 8.974568630205462e-06, "loss": 1.6914, "step": 39 }, { "epoch": 2.05, "eval_accuracy": 0.06357949136406908, "eval_loss": 2.501953125, "eval_runtime": 2.3189, "eval_samples_per_second": 24.581, "eval_steps_per_second": 1.725, "step": 39 }, { "epoch": 2.11, "learning_rate": 8.226999625336663e-06, "loss": 1.411, "step": 40 }, { "epoch": 2.11, "eval_accuracy": 0.06389682215875607, "eval_loss": 2.50390625, "eval_runtime": 3.1258, "eval_samples_per_second": 18.235, "eval_steps_per_second": 1.28, "step": 40 }, { "epoch": 2.16, "learning_rate": 7.500000000000004e-06, "loss": 1.4182, "step": 41 }, { "epoch": 2.16, "eval_accuracy": 0.0638514891880865, "eval_loss": 2.509765625, "eval_runtime": 3.1169, "eval_samples_per_second": 18.287, "eval_steps_per_second": 1.283, "step": 41 }, { "epoch": 2.21, "learning_rate": 6.795777628163599e-06, "loss": 1.6223, "step": 42 }, { "epoch": 2.21, "eval_accuracy": 0.06378348973208214, "eval_loss": 2.517578125, "eval_runtime": 2.7171, "eval_samples_per_second": 20.978, "eval_steps_per_second": 1.472, "step": 42 }, { "epoch": 2.26, "learning_rate": 6.116471210025302e-06, "loss": 1.623, "step": 43 }, { "epoch": 2.26, "eval_accuracy": 0.06344349245206038, "eval_loss": 2.52734375, "eval_runtime": 2.7121, "eval_samples_per_second": 21.017, "eval_steps_per_second": 1.475, "step": 43 }, { "epoch": 2.32, "learning_rate": 5.464143776850483e-06, "loss": 1.5748, "step": 44 }, { "epoch": 2.32, "eval_accuracy": 0.06344349245206038, "eval_loss": 2.537109375, "eval_runtime": 2.9216, "eval_samples_per_second": 19.51, "eval_steps_per_second": 1.369, "step": 44 }, { "epoch": 2.37, "learning_rate": 4.840776425613887e-06, "loss": 1.7166, "step": 45 }, { "epoch": 2.37, "eval_accuracy": 0.0631488281427082, "eval_loss": 2.546875, "eval_runtime": 3.1159, "eval_samples_per_second": 18.293, "eval_steps_per_second": 1.284, "step": 45 }, { "epoch": 2.42, "learning_rate": 4.248262302473233e-06, "loss": 1.3432, "step": 46 }, { "epoch": 2.42, "eval_accuracy": 0.0629901627453647, "eval_loss": 2.556640625, "eval_runtime": 2.9081, "eval_samples_per_second": 19.6, "eval_steps_per_second": 1.375, "step": 46 }, { "epoch": 2.47, "learning_rate": 3.688400853346558e-06, "loss": 1.5325, "step": 47 }, { "epoch": 2.47, "eval_accuracy": 0.06305816220136906, "eval_loss": 2.564453125, "eval_runtime": 2.9197, "eval_samples_per_second": 19.522, "eval_steps_per_second": 1.37, "step": 47 }, { "epoch": 2.53, "learning_rate": 3.162892359054098e-06, "loss": 1.5076, "step": 48 }, { "epoch": 2.53, "eval_accuracy": 0.062854163833356, "eval_loss": 2.572265625, "eval_runtime": 3.1077, "eval_samples_per_second": 18.341, "eval_steps_per_second": 1.287, "step": 48 }, { "epoch": 2.58, "learning_rate": 2.673332771621324e-06, "loss": 1.6636, "step": 49 }, { "epoch": 2.58, "eval_accuracy": 0.06274083140668207, "eval_loss": 2.578125, "eval_runtime": 2.5045, "eval_samples_per_second": 22.759, "eval_steps_per_second": 1.597, "step": 49 }, { "epoch": 2.63, "learning_rate": 2.221208867425096e-06, "loss": 1.2897, "step": 50 }, { "epoch": 2.63, "eval_accuracy": 0.06267283195067773, "eval_loss": 2.583984375, "eval_runtime": 3.1236, "eval_samples_per_second": 18.248, "eval_steps_per_second": 1.281, "step": 50 }, { "epoch": 2.68, "learning_rate": 1.8078937319026655e-06, "loss": 1.4559, "step": 51 }, { "epoch": 2.68, "eval_accuracy": 0.06265016546534294, "eval_loss": 2.587890625, "eval_runtime": 2.523, "eval_samples_per_second": 22.592, "eval_steps_per_second": 1.585, "step": 51 }, { "epoch": 2.74, "learning_rate": 1.4346425895363385e-06, "loss": 1.3904, "step": 52 }, { "epoch": 2.74, "eval_accuracy": 0.06267283195067773, "eval_loss": 2.58984375, "eval_runtime": 3.1182, "eval_samples_per_second": 18.28, "eval_steps_per_second": 1.283, "step": 52 }, { "epoch": 2.79, "learning_rate": 1.1025889917779735e-06, "loss": 1.4961, "step": 53 }, { "epoch": 2.79, "eval_accuracy": 0.0625594995240038, "eval_loss": 2.591796875, "eval_runtime": 2.3139, "eval_samples_per_second": 24.633, "eval_steps_per_second": 1.729, "step": 53 }, { "epoch": 2.84, "learning_rate": 8.127413744904805e-07, "loss": 1.5276, "step": 54 }, { "epoch": 2.84, "eval_accuracy": 0.06253683303866903, "eval_loss": 2.59375, "eval_runtime": 2.5424, "eval_samples_per_second": 22.42, "eval_steps_per_second": 1.573, "step": 54 }, { "epoch": 2.89, "learning_rate": 5.659799953612438e-07, "loss": 1.3479, "step": 55 }, { "epoch": 2.89, "eval_accuracy": 0.062468833582664675, "eval_loss": 2.595703125, "eval_runtime": 3.124, "eval_samples_per_second": 18.246, "eval_steps_per_second": 1.28, "step": 55 }, { "epoch": 2.95, "learning_rate": 3.630542605884657e-07, "loss": 1.4094, "step": 56 }, { "epoch": 2.95, "eval_accuracy": 0.06242350061199511, "eval_loss": 2.595703125, "eval_runtime": 2.3282, "eval_samples_per_second": 24.482, "eval_steps_per_second": 1.718, "step": 56 }, { "epoch": 3.0, "learning_rate": 2.0458044895916516e-07, "loss": 1.5486, "step": 57 }, { "epoch": 3.0, "eval_accuracy": 0.06235550115599075, "eval_loss": 2.595703125, "eval_runtime": 2.9026, "eval_samples_per_second": 19.637, "eval_steps_per_second": 1.378, "step": 57 }, { "epoch": 3.0, "step": 57, "total_flos": 3114896719872.0, "train_loss": 1.9830793414199561, "train_runtime": 617.3135, "train_samples_per_second": 1.473, "train_steps_per_second": 0.092 } ], "max_steps": 57, "num_train_epochs": 3, "total_flos": 3114896719872.0, "trial_name": null, "trial_params": null }