{ "best_metric": 6.5507588386535645, "best_model_checkpoint": "poetry-author/checkpoint-4960", "epoch": 4.0, "eval_steps": 500, "global_step": 4960, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.05, "learning_rate": 2.4193548387096776e-06, "loss": 7.2029, "step": 62 }, { "epoch": 0.1, "learning_rate": 4.919354838709678e-06, "loss": 7.1816, "step": 124 }, { "epoch": 0.15, "learning_rate": 7.419354838709678e-06, "loss": 7.1418, "step": 186 }, { "epoch": 0.2, "learning_rate": 9.919354838709679e-06, "loss": 7.1678, "step": 248 }, { "epoch": 0.25, "learning_rate": 1.2379032258064517e-05, "loss": 7.1274, "step": 310 }, { "epoch": 0.3, "learning_rate": 1.4879032258064519e-05, "loss": 7.1285, "step": 372 }, { "epoch": 0.35, "learning_rate": 1.7379032258064517e-05, "loss": 7.108, "step": 434 }, { "epoch": 0.4, "learning_rate": 1.9879032258064516e-05, "loss": 7.0809, "step": 496 }, { "epoch": 0.45, "learning_rate": 2.2379032258064516e-05, "loss": 7.1026, "step": 558 }, { "epoch": 0.5, "learning_rate": 2.4879032258064516e-05, "loss": 7.0691, "step": 620 }, { "epoch": 0.55, "learning_rate": 2.737903225806452e-05, "loss": 7.0714, "step": 682 }, { "epoch": 0.6, "learning_rate": 2.9879032258064516e-05, "loss": 7.0847, "step": 744 }, { "epoch": 0.65, "learning_rate": 3.2379032258064515e-05, "loss": 7.0491, "step": 806 }, { "epoch": 0.7, "learning_rate": 3.487903225806452e-05, "loss": 7.0685, "step": 868 }, { "epoch": 0.75, "learning_rate": 3.7379032258064515e-05, "loss": 7.038, "step": 930 }, { "epoch": 0.8, "learning_rate": 3.987903225806452e-05, "loss": 7.0614, "step": 992 }, { "epoch": 0.85, "learning_rate": 4.2379032258064514e-05, "loss": 7.0234, "step": 1054 }, { "epoch": 0.9, "learning_rate": 4.487903225806452e-05, "loss": 6.9863, "step": 1116 }, { "epoch": 0.95, "learning_rate": 4.737903225806452e-05, "loss": 6.9664, "step": 1178 }, { "epoch": 1.0, "learning_rate": 4.987903225806452e-05, "loss": 6.9782, "step": 1240 }, { "epoch": 1.0, "eval_accuracy": 0.02056451612903226, "eval_f1_macro": 0.0006143466739070911, "eval_f1_micro": 0.02056451612903226, "eval_f1_weighted": 0.004146182182163288, "eval_loss": 6.874168395996094, "eval_precision_macro": 0.0003535209813895348, "eval_precision_micro": 0.02056451612903226, "eval_precision_weighted": 0.0024119848598185387, "eval_recall_macro": 0.0037010590753155195, "eval_recall_micro": 0.02056451612903226, "eval_recall_weighted": 0.02056451612903226, "eval_runtime": 5.5652, "eval_samples_per_second": 445.625, "eval_steps_per_second": 27.852, "step": 1240 }, { "epoch": 1.05, "learning_rate": 4.9735663082437276e-05, "loss": 6.9144, "step": 1302 }, { "epoch": 1.1, "learning_rate": 4.94578853046595e-05, "loss": 6.8315, "step": 1364 }, { "epoch": 1.15, "learning_rate": 4.9180107526881726e-05, "loss": 6.8663, "step": 1426 }, { "epoch": 1.2, "learning_rate": 4.89068100358423e-05, "loss": 6.837, "step": 1488 }, { "epoch": 1.25, "learning_rate": 4.862903225806452e-05, "loss": 6.7745, "step": 1550 }, { "epoch": 1.3, "learning_rate": 4.835125448028674e-05, "loss": 6.8747, "step": 1612 }, { "epoch": 1.35, "learning_rate": 4.807347670250896e-05, "loss": 6.8005, "step": 1674 }, { "epoch": 1.4, "learning_rate": 4.7795698924731186e-05, "loss": 6.8131, "step": 1736 }, { "epoch": 1.45, "learning_rate": 4.751792114695341e-05, "loss": 6.7554, "step": 1798 }, { "epoch": 1.5, "learning_rate": 4.724014336917563e-05, "loss": 6.774, "step": 1860 }, { "epoch": 1.55, "learning_rate": 4.696236559139785e-05, "loss": 6.7964, "step": 1922 }, { "epoch": 1.6, "learning_rate": 4.6684587813620074e-05, "loss": 6.8653, "step": 1984 }, { "epoch": 1.65, "learning_rate": 4.64068100358423e-05, "loss": 6.8209, "step": 2046 }, { "epoch": 1.7, "learning_rate": 4.612903225806452e-05, "loss": 6.7755, "step": 2108 }, { "epoch": 1.75, "learning_rate": 4.585125448028674e-05, "loss": 6.8384, "step": 2170 }, { "epoch": 1.8, "learning_rate": 4.557347670250896e-05, "loss": 6.7932, "step": 2232 }, { "epoch": 1.85, "learning_rate": 4.5295698924731187e-05, "loss": 6.8017, "step": 2294 }, { "epoch": 1.9, "learning_rate": 4.5017921146953405e-05, "loss": 6.7575, "step": 2356 }, { "epoch": 1.95, "learning_rate": 4.474014336917563e-05, "loss": 6.7087, "step": 2418 }, { "epoch": 2.0, "learning_rate": 4.4462365591397856e-05, "loss": 6.8306, "step": 2480 }, { "epoch": 2.0, "eval_accuracy": 0.027822580645161292, "eval_f1_macro": 0.002895880527713299, "eval_f1_micro": 0.027822580645161292, "eval_f1_weighted": 0.011518078961075304, "eval_loss": 6.696753025054932, "eval_precision_macro": 0.002399325952306467, "eval_precision_micro": 0.027822580645161292, "eval_precision_weighted": 0.008800287744676547, "eval_recall_macro": 0.005697363214948901, "eval_recall_micro": 0.027822580645161292, "eval_recall_weighted": 0.027822580645161292, "eval_runtime": 5.6109, "eval_samples_per_second": 441.997, "eval_steps_per_second": 27.625, "step": 2480 }, { "epoch": 2.05, "learning_rate": 4.4184587813620074e-05, "loss": 6.5756, "step": 2542 }, { "epoch": 2.1, "learning_rate": 4.390681003584229e-05, "loss": 6.5611, "step": 2604 }, { "epoch": 2.15, "learning_rate": 4.362903225806452e-05, "loss": 6.6159, "step": 2666 }, { "epoch": 2.2, "learning_rate": 4.335125448028674e-05, "loss": 6.5746, "step": 2728 }, { "epoch": 2.25, "learning_rate": 4.307347670250896e-05, "loss": 6.4498, "step": 2790 }, { "epoch": 2.3, "learning_rate": 4.279569892473119e-05, "loss": 6.5829, "step": 2852 }, { "epoch": 2.35, "learning_rate": 4.2517921146953405e-05, "loss": 6.4411, "step": 2914 }, { "epoch": 2.4, "learning_rate": 4.224014336917563e-05, "loss": 6.5321, "step": 2976 }, { "epoch": 2.45, "learning_rate": 4.196236559139785e-05, "loss": 6.5179, "step": 3038 }, { "epoch": 2.5, "learning_rate": 4.1684587813620074e-05, "loss": 6.5047, "step": 3100 }, { "epoch": 2.55, "learning_rate": 4.14068100358423e-05, "loss": 6.5946, "step": 3162 }, { "epoch": 2.6, "learning_rate": 4.112903225806452e-05, "loss": 6.519, "step": 3224 }, { "epoch": 2.65, "learning_rate": 4.0851254480286736e-05, "loss": 6.4516, "step": 3286 }, { "epoch": 2.7, "learning_rate": 4.057347670250896e-05, "loss": 6.5272, "step": 3348 }, { "epoch": 2.75, "learning_rate": 4.029569892473119e-05, "loss": 6.5356, "step": 3410 }, { "epoch": 2.8, "learning_rate": 4.0017921146953405e-05, "loss": 6.5008, "step": 3472 }, { "epoch": 2.85, "learning_rate": 3.974014336917563e-05, "loss": 6.5357, "step": 3534 }, { "epoch": 2.9, "learning_rate": 3.94668458781362e-05, "loss": 6.5125, "step": 3596 }, { "epoch": 2.95, "learning_rate": 3.918906810035842e-05, "loss": 6.5182, "step": 3658 }, { "epoch": 3.0, "learning_rate": 3.891129032258065e-05, "loss": 6.4911, "step": 3720 }, { "epoch": 3.0, "eval_accuracy": 0.03951612903225806, "eval_f1_macro": 0.005772741775243502, "eval_f1_micro": 0.03951612903225806, "eval_f1_weighted": 0.01871310289711212, "eval_loss": 6.559847354888916, "eval_precision_macro": 0.005108167219150119, "eval_precision_micro": 0.03951612903225806, "eval_precision_weighted": 0.015116924332361547, "eval_recall_macro": 0.010902113654450544, "eval_recall_micro": 0.03951612903225806, "eval_recall_weighted": 0.03951612903225806, "eval_runtime": 5.604, "eval_samples_per_second": 442.538, "eval_steps_per_second": 27.659, "step": 3720 }, { "epoch": 3.05, "learning_rate": 3.863351254480287e-05, "loss": 6.111, "step": 3782 }, { "epoch": 3.1, "learning_rate": 3.835573476702509e-05, "loss": 6.2635, "step": 3844 }, { "epoch": 3.15, "learning_rate": 3.8077956989247316e-05, "loss": 6.1807, "step": 3906 }, { "epoch": 3.2, "learning_rate": 3.780017921146954e-05, "loss": 6.2969, "step": 3968 }, { "epoch": 3.25, "learning_rate": 3.752240143369175e-05, "loss": 6.1509, "step": 4030 }, { "epoch": 3.3, "learning_rate": 3.724462365591398e-05, "loss": 6.1196, "step": 4092 }, { "epoch": 3.35, "learning_rate": 3.69668458781362e-05, "loss": 6.1425, "step": 4154 }, { "epoch": 3.4, "learning_rate": 3.668906810035843e-05, "loss": 6.1487, "step": 4216 }, { "epoch": 3.45, "learning_rate": 3.641129032258065e-05, "loss": 6.1336, "step": 4278 }, { "epoch": 3.5, "learning_rate": 3.6133512544802866e-05, "loss": 6.1035, "step": 4340 }, { "epoch": 3.55, "learning_rate": 3.585573476702509e-05, "loss": 6.168, "step": 4402 }, { "epoch": 3.6, "learning_rate": 3.5577956989247316e-05, "loss": 6.283, "step": 4464 }, { "epoch": 3.65, "learning_rate": 3.5300179211469535e-05, "loss": 6.146, "step": 4526 }, { "epoch": 3.7, "learning_rate": 3.502240143369176e-05, "loss": 6.1099, "step": 4588 }, { "epoch": 3.75, "learning_rate": 3.4744623655913985e-05, "loss": 6.2278, "step": 4650 }, { "epoch": 3.8, "learning_rate": 3.4466845878136204e-05, "loss": 6.0957, "step": 4712 }, { "epoch": 3.85, "learning_rate": 3.418906810035842e-05, "loss": 6.0431, "step": 4774 }, { "epoch": 3.9, "learning_rate": 3.391129032258065e-05, "loss": 6.0954, "step": 4836 }, { "epoch": 3.95, "learning_rate": 3.363351254480287e-05, "loss": 6.1678, "step": 4898 }, { "epoch": 4.0, "learning_rate": 3.335573476702509e-05, "loss": 6.1257, "step": 4960 }, { "epoch": 4.0, "eval_accuracy": 0.04596774193548387, "eval_f1_macro": 0.007553613837545024, "eval_f1_micro": 0.04596774193548386, "eval_f1_weighted": 0.02342410713485415, "eval_loss": 6.5507588386535645, "eval_precision_macro": 0.0064405619945614515, "eval_precision_micro": 0.04596774193548387, "eval_precision_weighted": 0.020352886732684962, "eval_recall_macro": 0.017759772448305164, "eval_recall_micro": 0.04596774193548387, "eval_recall_weighted": 0.04596774193548387, "eval_runtime": 5.6061, "eval_samples_per_second": 442.376, "eval_steps_per_second": 27.649, "step": 4960 } ], "logging_steps": 62, "max_steps": 12400, "num_input_tokens_seen": 0, "num_train_epochs": 10, "save_steps": 500, "total_flos": 2638150224863232.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }