{ "best_metric": 0.0006635845638811588, "best_model_checkpoint": "./megumin_results/checkpoint-2500", "epoch": 4.805382027871215, "eval_steps": 500, "global_step": 2500, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.19221528111484862, "grad_norm": 4.030124664306641, "learning_rate": 3.96e-06, "loss": 1.2166, "step": 100 }, { "epoch": 0.38443056222969724, "grad_norm": 0.825600266456604, "learning_rate": 7.960000000000002e-06, "loss": 0.2925, "step": 200 }, { "epoch": 0.5766458433445459, "grad_norm": 0.7094722986221313, "learning_rate": 1.1920000000000001e-05, "loss": 0.0769, "step": 300 }, { "epoch": 0.7688611244593945, "grad_norm": 0.23390699923038483, "learning_rate": 1.5920000000000003e-05, "loss": 0.0445, "step": 400 }, { "epoch": 0.9610764055742431, "grad_norm": 0.8664096593856812, "learning_rate": 1.9920000000000002e-05, "loss": 0.0304, "step": 500 }, { "epoch": 0.9610764055742431, "eval_loss": 0.0065709324553608894, "eval_runtime": 116.7786, "eval_samples_per_second": 15.842, "eval_steps_per_second": 1.987, "step": 500 }, { "epoch": 1.1532916866890919, "grad_norm": 0.970707356929779, "learning_rate": 1.9892723329629885e-05, "loss": 0.0215, "step": 600 }, { "epoch": 1.3455069678039404, "grad_norm": 0.19242490828037262, "learning_rate": 1.9564504327386318e-05, "loss": 0.0154, "step": 700 }, { "epoch": 1.5377222489187892, "grad_norm": 0.1774681955575943, "learning_rate": 1.9022630103336575e-05, "loss": 0.0161, "step": 800 }, { "epoch": 1.7299375300336377, "grad_norm": 0.5783206224441528, "learning_rate": 1.8279205232225727e-05, "loss": 0.0127, "step": 900 }, { "epoch": 1.9221528111484862, "grad_norm": 0.2338998168706894, "learning_rate": 1.7350836597201767e-05, "loss": 0.0118, "step": 1000 }, { "epoch": 1.9221528111484862, "eval_loss": 0.001669618533924222, "eval_runtime": 116.8653, "eval_samples_per_second": 15.83, "eval_steps_per_second": 1.985, "step": 1000 }, { "epoch": 2.114368092263335, "grad_norm": 0.14343367516994476, "learning_rate": 1.625826241948815e-05, "loss": 0.0091, "step": 1100 }, { "epoch": 2.3065833733781838, "grad_norm": 0.2039719969034195, "learning_rate": 1.502588900079051e-05, "loss": 0.0094, "step": 1200 }, { "epoch": 2.498798654493032, "grad_norm": 0.15303683280944824, "learning_rate": 1.3681245526846782e-05, "loss": 0.0082, "step": 1300 }, { "epoch": 2.691013935607881, "grad_norm": 0.07792511582374573, "learning_rate": 1.2254369110908413e-05, "loss": 0.0083, "step": 1400 }, { "epoch": 2.8832292167227296, "grad_norm": 0.07674787193536758, "learning_rate": 1.0777133814265167e-05, "loss": 0.0073, "step": 1500 }, { "epoch": 2.8832292167227296, "eval_loss": 0.0009687193087302148, "eval_runtime": 116.8145, "eval_samples_per_second": 15.837, "eval_steps_per_second": 1.986, "step": 1500 }, { "epoch": 3.075444497837578, "grad_norm": 0.25639790296554565, "learning_rate": 9.282538632386208e-06, "loss": 0.0063, "step": 1600 }, { "epoch": 3.2676597789524267, "grad_norm": 0.21751342713832855, "learning_rate": 7.80397035189052e-06, "loss": 0.0056, "step": 1700 }, { "epoch": 3.4598750600672754, "grad_norm": 0.14014215767383575, "learning_rate": 6.3744577449038415e-06, "loss": 0.0049, "step": 1800 }, { "epoch": 3.6520903411821237, "grad_norm": 1.104333519935608, "learning_rate": 5.025933760867782e-06, "loss": 0.0059, "step": 1900 }, { "epoch": 3.8443056222969725, "grad_norm": 0.16992008686065674, "learning_rate": 3.7885221972168974e-06, "loss": 0.005, "step": 2000 }, { "epoch": 3.8443056222969725, "eval_loss": 0.0007113968022167683, "eval_runtime": 116.804, "eval_samples_per_second": 15.838, "eval_steps_per_second": 1.986, "step": 2000 }, { "epoch": 4.036520903411821, "grad_norm": 0.9393255114555359, "learning_rate": 2.689864783522098e-06, "loss": 0.0054, "step": 2100 }, { "epoch": 4.22873618452667, "grad_norm": 0.24965894222259521, "learning_rate": 1.7545037109285946e-06, "loss": 0.005, "step": 2200 }, { "epoch": 4.420951465641519, "grad_norm": 0.2450593262910843, "learning_rate": 1.0033334001604833e-06, "loss": 0.0059, "step": 2300 }, { "epoch": 4.6131667467563675, "grad_norm": 0.5003035068511963, "learning_rate": 4.5313375468875155e-07, "loss": 0.005, "step": 2400 }, { "epoch": 4.805382027871215, "grad_norm": 0.019921617582440376, "learning_rate": 1.1619532541569333e-07, "loss": 0.0042, "step": 2500 }, { "epoch": 4.805382027871215, "eval_loss": 0.0006635845638811588, "eval_runtime": 116.6762, "eval_samples_per_second": 15.856, "eval_steps_per_second": 1.988, "step": 2500 } ], "logging_steps": 100, "max_steps": 2600, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 5.822138548224e+16, "train_batch_size": 4, "trial_name": null, "trial_params": null }