{ "best_metric": 1.072191596031189, "best_model_checkpoint": "/kaggle/output/checkpoint-18000", "epoch": 0.7333767926988266, "eval_steps": 1000, "global_step": 18000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 2.7777777777777777e-11, "loss": 1.2184, "step": 1 }, { "epoch": 0.04, "learning_rate": 2.7750000000000004e-08, "loss": 1.1394, "step": 1000 }, { "epoch": 0.04, "eval_accuracy": 0.3327345309381238, "eval_loss": 1.1149410009384155, "eval_runtime": 20.6803, "eval_samples_per_second": 242.26, "eval_steps_per_second": 30.319, "step": 1000 }, { "epoch": 0.08, "learning_rate": 5.5527777777777784e-08, "loss": 1.1141, "step": 2000 }, { "epoch": 0.08, "eval_accuracy": 0.3401197604790419, "eval_loss": 1.104099988937378, "eval_runtime": 20.8477, "eval_samples_per_second": 240.314, "eval_steps_per_second": 30.075, "step": 2000 }, { "epoch": 0.12, "learning_rate": 8.330555555555556e-08, "loss": 1.116, "step": 3000 }, { "epoch": 0.12, "eval_accuracy": 0.3407185628742515, "eval_loss": 1.1040862798690796, "eval_runtime": 20.6818, "eval_samples_per_second": 242.242, "eval_steps_per_second": 30.317, "step": 3000 }, { "epoch": 0.16, "learning_rate": 1.1108333333333333e-07, "loss": 1.1158, "step": 4000 }, { "epoch": 0.16, "eval_accuracy": 0.32894211576846305, "eval_loss": 1.1020556688308716, "eval_runtime": 20.8541, "eval_samples_per_second": 240.241, "eval_steps_per_second": 30.066, "step": 4000 }, { "epoch": 0.2, "learning_rate": 1.3883333333333335e-07, "loss": 1.1135, "step": 5000 }, { "epoch": 0.2, "eval_accuracy": 0.34271457085828344, "eval_loss": 1.1008552312850952, "eval_runtime": 20.8055, "eval_samples_per_second": 240.802, "eval_steps_per_second": 30.136, "step": 5000 }, { "epoch": 0.24, "learning_rate": 1.6658333333333335e-07, "loss": 1.1121, "step": 6000 }, { "epoch": 0.24, "eval_accuracy": 0.3395209580838323, "eval_loss": 1.1004050970077515, "eval_runtime": 20.8985, "eval_samples_per_second": 239.731, "eval_steps_per_second": 30.002, "step": 6000 }, { "epoch": 0.29, "learning_rate": 1.9436111111111112e-07, "loss": 1.1089, "step": 7000 }, { "epoch": 0.29, "eval_accuracy": 0.35788423153692617, "eval_loss": 1.0985721349716187, "eval_runtime": 20.84, "eval_samples_per_second": 240.403, "eval_steps_per_second": 30.086, "step": 7000 }, { "epoch": 0.33, "learning_rate": 2.2213888888888891e-07, "loss": 1.1079, "step": 8000 }, { "epoch": 0.33, "eval_accuracy": 0.3331337325349301, "eval_loss": 1.098374843597412, "eval_runtime": 20.7886, "eval_samples_per_second": 240.998, "eval_steps_per_second": 30.161, "step": 8000 }, { "epoch": 0.37, "learning_rate": 2.4988888888888893e-07, "loss": 1.1087, "step": 9000 }, { "epoch": 0.37, "eval_accuracy": 0.34510978043912177, "eval_loss": 1.0993521213531494, "eval_runtime": 20.782, "eval_samples_per_second": 241.074, "eval_steps_per_second": 30.17, "step": 9000 }, { "epoch": 0.41, "learning_rate": 2.776666666666667e-07, "loss": 1.109, "step": 10000 }, { "epoch": 0.41, "eval_accuracy": 0.3475049900199601, "eval_loss": 1.0967597961425781, "eval_runtime": 20.6798, "eval_samples_per_second": 242.265, "eval_steps_per_second": 30.319, "step": 10000 }, { "epoch": 0.45, "learning_rate": 3.054444444444444e-07, "loss": 1.1052, "step": 11000 }, { "epoch": 0.45, "eval_accuracy": 0.37544910179640717, "eval_loss": 1.0941349267959595, "eval_runtime": 20.8641, "eval_samples_per_second": 240.126, "eval_steps_per_second": 30.052, "step": 11000 }, { "epoch": 0.49, "learning_rate": 3.3322222222222225e-07, "loss": 1.105, "step": 12000 }, { "epoch": 0.49, "eval_accuracy": 0.3834331337325349, "eval_loss": 1.0927647352218628, "eval_runtime": 20.6541, "eval_samples_per_second": 242.567, "eval_steps_per_second": 30.357, "step": 12000 }, { "epoch": 0.53, "learning_rate": 3.609722222222222e-07, "loss": 1.1016, "step": 13000 }, { "epoch": 0.53, "eval_accuracy": 0.3457085828343313, "eval_loss": 1.0942081212997437, "eval_runtime": 21.0733, "eval_samples_per_second": 237.742, "eval_steps_per_second": 29.753, "step": 13000 }, { "epoch": 0.57, "learning_rate": 3.8875e-07, "loss": 1.1031, "step": 14000 }, { "epoch": 0.57, "eval_accuracy": 0.37005988023952097, "eval_loss": 1.0918152332305908, "eval_runtime": 20.9151, "eval_samples_per_second": 239.54, "eval_steps_per_second": 29.978, "step": 14000 }, { "epoch": 0.61, "learning_rate": 4.1652777777777786e-07, "loss": 1.1026, "step": 15000 }, { "epoch": 0.61, "eval_accuracy": 0.3790419161676647, "eval_loss": 1.0895211696624756, "eval_runtime": 21.0591, "eval_samples_per_second": 237.902, "eval_steps_per_second": 29.773, "step": 15000 }, { "epoch": 0.65, "learning_rate": 4.4427777777777783e-07, "loss": 1.0988, "step": 16000 }, { "epoch": 0.65, "eval_accuracy": 0.4101796407185629, "eval_loss": 1.0852997303009033, "eval_runtime": 20.9509, "eval_samples_per_second": 239.131, "eval_steps_per_second": 29.927, "step": 16000 }, { "epoch": 0.69, "learning_rate": 4.720555555555556e-07, "loss": 1.0974, "step": 17000 }, { "epoch": 0.69, "eval_accuracy": 0.43213572854291415, "eval_loss": 1.0791982412338257, "eval_runtime": 20.7526, "eval_samples_per_second": 241.415, "eval_steps_per_second": 30.213, "step": 17000 }, { "epoch": 0.73, "learning_rate": 4.998055555555556e-07, "loss": 1.0932, "step": 18000 }, { "epoch": 0.73, "eval_accuracy": 0.4275449101796407, "eval_loss": 1.072191596031189, "eval_runtime": 21.2435, "eval_samples_per_second": 235.837, "eval_steps_per_second": 29.515, "step": 18000 } ], "logging_steps": 1000, "max_steps": 10000000, "num_train_epochs": 408, "save_steps": 1000, "total_flos": 1.2542810259456e+16, "trial_name": null, "trial_params": null }