{ "best_metric": 0.01498666312545538, "best_model_checkpoint": "./text2sql/codellama_instruct_pt_text2sql/checkpoint-44000", "epoch": 4.9995126366358, "eval_steps": 2000, "global_step": 44880, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 1.1139992573338283e-07, "loss": 1.2191, "step": 1 }, { "epoch": 0.11, "learning_rate": 0.00011139992573338284, "loss": 0.2684, "step": 1000 }, { "epoch": 0.22, "learning_rate": 0.00022279985146676567, "loss": 0.0693, "step": 2000 }, { "epoch": 0.22, "eval_loss": 0.05887996032834053, "eval_runtime": 171.127, "eval_samples_per_second": 11.687, "eval_steps_per_second": 1.461, "step": 2000 }, { "epoch": 0.33, "learning_rate": 0.00029781686301467276, "loss": 0.0565, "step": 3000 }, { "epoch": 0.45, "learning_rate": 0.0002907056676227274, "loss": 0.047, "step": 4000 }, { "epoch": 0.45, "eval_loss": 0.03957173973321915, "eval_runtime": 171.3682, "eval_samples_per_second": 11.671, "eval_steps_per_second": 1.459, "step": 4000 }, { "epoch": 0.56, "learning_rate": 0.00028359447223078195, "loss": 0.0403, "step": 5000 }, { "epoch": 0.67, "learning_rate": 0.0002764832768388366, "loss": 0.0364, "step": 6000 }, { "epoch": 0.67, "eval_loss": 0.030724667012691498, "eval_runtime": 170.9405, "eval_samples_per_second": 11.7, "eval_steps_per_second": 1.462, "step": 6000 }, { "epoch": 0.78, "learning_rate": 0.0002693720814468912, "loss": 0.0341, "step": 7000 }, { "epoch": 0.89, "learning_rate": 0.00026226088605494583, "loss": 0.0311, "step": 8000 }, { "epoch": 0.89, "eval_loss": 0.027806701138615608, "eval_runtime": 171.064, "eval_samples_per_second": 11.692, "eval_steps_per_second": 1.461, "step": 8000 }, { "epoch": 1.0, "learning_rate": 0.00025514969066300046, "loss": 0.0289, "step": 9000 }, { "epoch": 1.11, "learning_rate": 0.00024803849527105503, "loss": 0.0251, "step": 10000 }, { "epoch": 1.11, "eval_loss": 0.024082325398921967, "eval_runtime": 171.4689, "eval_samples_per_second": 11.664, "eval_steps_per_second": 1.458, "step": 10000 }, { "epoch": 1.23, "learning_rate": 0.00024092729987910966, "loss": 0.0242, "step": 11000 }, { "epoch": 1.34, "learning_rate": 0.00023381610448716428, "loss": 0.0243, "step": 12000 }, { "epoch": 1.34, "eval_loss": 0.02279273048043251, "eval_runtime": 171.2147, "eval_samples_per_second": 11.681, "eval_steps_per_second": 1.46, "step": 12000 }, { "epoch": 1.45, "learning_rate": 0.00022670490909521888, "loss": 0.0219, "step": 13000 }, { "epoch": 1.56, "learning_rate": 0.0002195937137032735, "loss": 0.0227, "step": 14000 }, { "epoch": 1.56, "eval_loss": 0.022252563387155533, "eval_runtime": 171.1834, "eval_samples_per_second": 11.683, "eval_steps_per_second": 1.46, "step": 14000 }, { "epoch": 1.67, "learning_rate": 0.00021248251831132813, "loss": 0.0218, "step": 15000 }, { "epoch": 1.78, "learning_rate": 0.00020537132291938273, "loss": 0.0212, "step": 16000 }, { "epoch": 1.78, "eval_loss": 0.020056582987308502, "eval_runtime": 170.9191, "eval_samples_per_second": 11.701, "eval_steps_per_second": 1.463, "step": 16000 }, { "epoch": 1.89, "learning_rate": 0.00019826012752743733, "loss": 0.0208, "step": 17000 }, { "epoch": 2.01, "learning_rate": 0.00019114893213549196, "loss": 0.0202, "step": 18000 }, { "epoch": 2.01, "eval_loss": 0.01821763999760151, "eval_runtime": 171.4736, "eval_samples_per_second": 11.664, "eval_steps_per_second": 1.458, "step": 18000 }, { "epoch": 2.12, "learning_rate": 0.00018403773674354658, "loss": 0.0161, "step": 19000 }, { "epoch": 2.23, "learning_rate": 0.0001769265413516012, "loss": 0.016, "step": 20000 }, { "epoch": 2.23, "eval_loss": 0.018352985382080078, "eval_runtime": 171.2111, "eval_samples_per_second": 11.681, "eval_steps_per_second": 1.46, "step": 20000 }, { "epoch": 2.34, "learning_rate": 0.00016981534595965578, "loss": 0.0155, "step": 21000 }, { "epoch": 2.45, "learning_rate": 0.0001627041505677104, "loss": 0.0156, "step": 22000 }, { "epoch": 2.45, "eval_loss": 0.017868032678961754, "eval_runtime": 171.2047, "eval_samples_per_second": 11.682, "eval_steps_per_second": 1.46, "step": 22000 }, { "epoch": 2.56, "learning_rate": 0.00015559295517576503, "loss": 0.0155, "step": 23000 }, { "epoch": 2.67, "learning_rate": 0.00014848175978381966, "loss": 0.015, "step": 24000 }, { "epoch": 2.67, "eval_loss": 0.017283763736486435, "eval_runtime": 171.6023, "eval_samples_per_second": 11.655, "eval_steps_per_second": 1.457, "step": 24000 }, { "epoch": 2.78, "learning_rate": 0.00014137056439187426, "loss": 0.0153, "step": 25000 }, { "epoch": 2.9, "learning_rate": 0.00013425936899992888, "loss": 0.0147, "step": 26000 }, { "epoch": 2.9, "eval_loss": 0.016522083431482315, "eval_runtime": 171.211, "eval_samples_per_second": 11.681, "eval_steps_per_second": 1.46, "step": 26000 }, { "epoch": 3.01, "learning_rate": 0.00012714817360798348, "loss": 0.0137, "step": 27000 }, { "epoch": 3.12, "learning_rate": 0.00012003697821603811, "loss": 0.0112, "step": 28000 }, { "epoch": 3.12, "eval_loss": 0.016529470682144165, "eval_runtime": 170.9168, "eval_samples_per_second": 11.702, "eval_steps_per_second": 1.463, "step": 28000 }, { "epoch": 3.23, "learning_rate": 0.00011292578282409272, "loss": 0.0112, "step": 29000 }, { "epoch": 3.34, "learning_rate": 0.00010581458743214733, "loss": 0.0109, "step": 30000 }, { "epoch": 3.34, "eval_loss": 0.016144245862960815, "eval_runtime": 171.5226, "eval_samples_per_second": 11.66, "eval_steps_per_second": 1.458, "step": 30000 }, { "epoch": 3.45, "learning_rate": 9.870339204020194e-05, "loss": 0.0111, "step": 31000 }, { "epoch": 3.56, "learning_rate": 9.159219664825657e-05, "loss": 0.0109, "step": 32000 }, { "epoch": 3.56, "eval_loss": 0.015489176847040653, "eval_runtime": 171.4873, "eval_samples_per_second": 11.663, "eval_steps_per_second": 1.458, "step": 32000 }, { "epoch": 3.68, "learning_rate": 8.448100125631117e-05, "loss": 0.0111, "step": 33000 }, { "epoch": 3.79, "learning_rate": 7.73698058643658e-05, "loss": 0.0105, "step": 34000 }, { "epoch": 3.79, "eval_loss": 0.015172351151704788, "eval_runtime": 171.4626, "eval_samples_per_second": 11.664, "eval_steps_per_second": 1.458, "step": 34000 }, { "epoch": 3.9, "learning_rate": 7.025861047242041e-05, "loss": 0.0107, "step": 35000 }, { "epoch": 4.01, "learning_rate": 6.314741508047502e-05, "loss": 0.0104, "step": 36000 }, { "epoch": 4.01, "eval_loss": 0.014995447359979153, "eval_runtime": 171.1146, "eval_samples_per_second": 11.688, "eval_steps_per_second": 1.461, "step": 36000 }, { "epoch": 4.12, "learning_rate": 5.603621968852963e-05, "loss": 0.0081, "step": 37000 }, { "epoch": 4.23, "learning_rate": 4.8925024296584245e-05, "loss": 0.0077, "step": 38000 }, { "epoch": 4.23, "eval_loss": 0.015779005363583565, "eval_runtime": 171.0935, "eval_samples_per_second": 11.69, "eval_steps_per_second": 1.461, "step": 38000 }, { "epoch": 4.34, "learning_rate": 4.181382890463887e-05, "loss": 0.0079, "step": 39000 }, { "epoch": 4.46, "learning_rate": 3.4702633512693484e-05, "loss": 0.0078, "step": 40000 }, { "epoch": 4.46, "eval_loss": 0.015086671337485313, "eval_runtime": 171.2287, "eval_samples_per_second": 11.68, "eval_steps_per_second": 1.46, "step": 40000 }, { "epoch": 4.57, "learning_rate": 2.7591438120748093e-05, "loss": 0.0079, "step": 41000 }, { "epoch": 4.68, "learning_rate": 2.0480242728802712e-05, "loss": 0.0076, "step": 42000 }, { "epoch": 4.68, "eval_loss": 0.015026736073195934, "eval_runtime": 170.9263, "eval_samples_per_second": 11.701, "eval_steps_per_second": 1.463, "step": 42000 }, { "epoch": 4.79, "learning_rate": 1.3369047336857326e-05, "loss": 0.0075, "step": 43000 }, { "epoch": 4.9, "learning_rate": 6.257851944911939e-06, "loss": 0.0077, "step": 44000 }, { "epoch": 4.9, "eval_loss": 0.01498666312545538, "eval_runtime": 171.2767, "eval_samples_per_second": 11.677, "eval_steps_per_second": 1.46, "step": 44000 }, { "epoch": 5.0, "step": 44880, "total_flos": 2.184131254639421e+19, "train_loss": 0.0249389049336447, "train_runtime": 262166.9813, "train_samples_per_second": 5.479, "train_steps_per_second": 0.171 } ], "logging_steps": 1000, "max_steps": 44880, "num_train_epochs": 5, "save_steps": 2000, "total_flos": 2.184131254639421e+19, "trial_name": null, "trial_params": null }