{ "best_metric": null, "best_model_checkpoint": null, "epoch": 15.0, "global_step": 3750, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 1.999466666666667e-05, "loss": 2.308, "step": 1 }, { "epoch": 0.75, "learning_rate": 1.8997333333333335e-05, "loss": 1.6916, "step": 188 }, { "epoch": 0.75, "eval_accuracy": 0.6755, "eval_disk_space_total": 78.1898422241211, "eval_disk_space_used": 24.806385040283203, "eval_f1": 0.6708054417489328, "eval_gpu_ram_allocated": 2.089780330657959, "eval_gpu_ram_cached": 25.85546875, "eval_gpu_ram_total": 39.56402587890625, "eval_gpu_utilization": 50, "eval_loss": 1.1062816381454468, "eval_precision": 0.690043017889279, "eval_recall": 0.6755, "eval_runtime": 2.3912, "eval_samples_per_second": 836.398, "eval_steps_per_second": 26.347, "eval_system_ram_total": 83.48074722290039, "eval_system_ram_used": 4.019077301025391, "step": 188 }, { "epoch": 1.5, "learning_rate": 1.8e-05, "loss": 0.9694, "step": 376 }, { "epoch": 1.5, "eval_accuracy": 0.7195, "eval_disk_space_total": 78.1898422241211, "eval_disk_space_used": 29.64177703857422, "eval_f1": 0.7181081417115642, "eval_gpu_ram_allocated": 2.0897774696350098, "eval_gpu_ram_cached": 25.85546875, "eval_gpu_ram_total": 39.56402587890625, "eval_gpu_utilization": 50, "eval_loss": 0.9585903286933899, "eval_precision": 0.719758443061289, "eval_recall": 0.7195, "eval_runtime": 2.3693, "eval_samples_per_second": 844.141, "eval_steps_per_second": 26.59, "eval_system_ram_total": 83.48074722290039, "eval_system_ram_used": 4.2536163330078125, "step": 376 }, { "epoch": 2.26, "learning_rate": 1.6997333333333334e-05, "loss": 0.8509, "step": 564 }, { "epoch": 2.26, "eval_accuracy": 0.712, "eval_disk_space_total": 78.1898422241211, "eval_disk_space_used": 29.641841888427734, "eval_f1": 0.7070168337920522, "eval_gpu_ram_allocated": 2.089791774749756, "eval_gpu_ram_cached": 25.85546875, "eval_gpu_ram_total": 39.56402587890625, "eval_gpu_utilization": 46, "eval_loss": 0.9747923016548157, "eval_precision": 0.7160570316458433, "eval_recall": 0.712, "eval_runtime": 2.4432, "eval_samples_per_second": 818.586, "eval_steps_per_second": 25.785, "eval_system_ram_total": 83.48074722290039, "eval_system_ram_used": 4.160213470458984, "step": 564 }, { "epoch": 3.01, "learning_rate": 1.5994666666666668e-05, "loss": 0.7475, "step": 752 }, { "epoch": 3.01, "eval_accuracy": 0.714, "eval_disk_space_total": 78.1898422241211, "eval_disk_space_used": 29.641963958740234, "eval_f1": 0.7122032912823338, "eval_gpu_ram_allocated": 2.089776039123535, "eval_gpu_ram_cached": 25.85546875, "eval_gpu_ram_total": 39.56402587890625, "eval_gpu_utilization": 50, "eval_loss": 0.9446640014648438, "eval_precision": 0.7148157467744413, "eval_recall": 0.714, "eval_runtime": 2.5063, "eval_samples_per_second": 798.001, "eval_steps_per_second": 25.137, "eval_system_ram_total": 83.48074722290039, "eval_system_ram_used": 4.160709381103516, "step": 752 }, { "epoch": 3.76, "learning_rate": 1.4997333333333335e-05, "loss": 0.5841, "step": 940 }, { "epoch": 3.76, "eval_accuracy": 0.711, "eval_disk_space_total": 78.1898422241211, "eval_disk_space_used": 29.642024993896484, "eval_f1": 0.7076606604060025, "eval_gpu_ram_allocated": 2.089787483215332, "eval_gpu_ram_cached": 25.85546875, "eval_gpu_ram_total": 39.56402587890625, "eval_gpu_utilization": 47, "eval_loss": 1.0064291954040527, "eval_precision": 0.7225290812411572, "eval_recall": 0.711, "eval_runtime": 2.4755, "eval_samples_per_second": 807.933, "eval_steps_per_second": 25.45, "eval_system_ram_total": 83.48074722290039, "eval_system_ram_used": 4.188880920410156, "step": 940 }, { "epoch": 4.51, "learning_rate": 1.3994666666666668e-05, "loss": 0.4972, "step": 1128 }, { "epoch": 4.51, "eval_accuracy": 0.714, "eval_disk_space_total": 78.1898422241211, "eval_disk_space_used": 29.642135620117188, "eval_f1": 0.7109995031569997, "eval_gpu_ram_allocated": 2.089801788330078, "eval_gpu_ram_cached": 25.85546875, "eval_gpu_ram_total": 39.56402587890625, "eval_gpu_utilization": 47, "eval_loss": 1.0585097074508667, "eval_precision": 0.7129473752365556, "eval_recall": 0.714, "eval_runtime": 2.3843, "eval_samples_per_second": 838.824, "eval_steps_per_second": 26.423, "eval_system_ram_total": 83.48074722290039, "eval_system_ram_used": 4.176631927490234, "step": 1128 }, { "epoch": 5.26, "learning_rate": 1.2992e-05, "loss": 0.4555, "step": 1316 }, { "epoch": 5.26, "eval_accuracy": 0.7075, "eval_disk_space_total": 78.1898422241211, "eval_disk_space_used": 33.76519775390625, "eval_f1": 0.7086283787248422, "eval_gpu_ram_allocated": 2.089810371398926, "eval_gpu_ram_cached": 25.85546875, "eval_gpu_ram_total": 39.56402587890625, "eval_gpu_utilization": 46, "eval_loss": 1.117536187171936, "eval_precision": 0.71510102752271, "eval_recall": 0.7075, "eval_runtime": 2.5545, "eval_samples_per_second": 782.936, "eval_steps_per_second": 24.662, "eval_system_ram_total": 83.48074722290039, "eval_system_ram_used": 4.225734710693359, "step": 1316 }, { "epoch": 6.02, "learning_rate": 1.1989333333333336e-05, "loss": 0.3535, "step": 1504 }, { "epoch": 6.02, "eval_accuracy": 0.708, "eval_disk_space_total": 78.1898422241211, "eval_disk_space_used": 33.76530456542969, "eval_f1": 0.7032209621498534, "eval_gpu_ram_allocated": 2.0898032188415527, "eval_gpu_ram_cached": 25.85546875, "eval_gpu_ram_total": 39.56402587890625, "eval_gpu_utilization": 50, "eval_loss": 1.1748836040496826, "eval_precision": 0.7076659711678004, "eval_recall": 0.708, "eval_runtime": 2.3932, "eval_samples_per_second": 835.715, "eval_steps_per_second": 26.325, "eval_system_ram_total": 83.48074722290039, "eval_system_ram_used": 4.23016357421875, "step": 1504 }, { "epoch": 6.77, "learning_rate": 1.0986666666666668e-05, "loss": 0.2614, "step": 1692 }, { "epoch": 6.77, "eval_accuracy": 0.709, "eval_disk_space_total": 78.1898422241211, "eval_disk_space_used": 33.76542282104492, "eval_f1": 0.7056311006074188, "eval_gpu_ram_allocated": 2.089783191680908, "eval_gpu_ram_cached": 25.85546875, "eval_gpu_ram_total": 39.56402587890625, "eval_gpu_utilization": 49, "eval_loss": 1.2027860879898071, "eval_precision": 0.7079398723985221, "eval_recall": 0.709, "eval_runtime": 2.3888, "eval_samples_per_second": 837.234, "eval_steps_per_second": 26.373, "eval_system_ram_total": 83.48074722290039, "eval_system_ram_used": 4.237628936767578, "step": 1692 }, { "epoch": 7.52, "learning_rate": 9.984e-06, "loss": 0.2321, "step": 1880 }, { "epoch": 7.52, "eval_accuracy": 0.698, "eval_disk_space_total": 78.1898422241211, "eval_disk_space_used": 33.7656135559082, "eval_f1": 0.7018556265437493, "eval_gpu_ram_allocated": 2.089846134185791, "eval_gpu_ram_cached": 25.85546875, "eval_gpu_ram_total": 39.56402587890625, "eval_gpu_utilization": 49, "eval_loss": 1.2960551977157593, "eval_precision": 0.708462957552084, "eval_recall": 0.698, "eval_runtime": 2.391, "eval_samples_per_second": 836.478, "eval_steps_per_second": 26.349, "eval_system_ram_total": 83.48074722290039, "eval_system_ram_used": 4.224781036376953, "step": 1880 }, { "epoch": 8.27, "learning_rate": 8.981333333333333e-06, "loss": 0.197, "step": 2068 }, { "epoch": 8.27, "eval_accuracy": 0.712, "eval_disk_space_total": 78.1898422241211, "eval_disk_space_used": 33.7657470703125, "eval_f1": 0.7097931257647566, "eval_gpu_ram_allocated": 2.0897903442382812, "eval_gpu_ram_cached": 25.85546875, "eval_gpu_ram_total": 39.56402587890625, "eval_gpu_utilization": 45, "eval_loss": 1.3960117101669312, "eval_precision": 0.7137187449926237, "eval_recall": 0.712, "eval_runtime": 2.3878, "eval_samples_per_second": 837.604, "eval_steps_per_second": 26.385, "eval_system_ram_total": 83.48074722290039, "eval_system_ram_used": 4.219398498535156, "step": 2068 }, { "epoch": 9.02, "learning_rate": 7.978666666666667e-06, "loss": 0.1505, "step": 2256 }, { "epoch": 9.02, "eval_accuracy": 0.7075, "eval_disk_space_total": 78.1898422241211, "eval_disk_space_used": 35.827659606933594, "eval_f1": 0.709341703450241, "eval_gpu_ram_allocated": 2.0897817611694336, "eval_gpu_ram_cached": 25.85546875, "eval_gpu_ram_total": 39.56402587890625, "eval_gpu_utilization": 48, "eval_loss": 1.4310206174850464, "eval_precision": 0.7133423622104005, "eval_recall": 0.7075, "eval_runtime": 2.4471, "eval_samples_per_second": 817.29, "eval_steps_per_second": 25.745, "eval_system_ram_total": 83.48074722290039, "eval_system_ram_used": 4.2417755126953125, "step": 2256 }, { "epoch": 9.78, "learning_rate": 6.976000000000001e-06, "loss": 0.1132, "step": 2444 }, { "epoch": 9.78, "eval_accuracy": 0.7045, "eval_disk_space_total": 78.1898422241211, "eval_disk_space_used": 35.827754974365234, "eval_f1": 0.705265213679387, "eval_gpu_ram_allocated": 2.089801788330078, "eval_gpu_ram_cached": 25.85546875, "eval_gpu_ram_total": 39.56402587890625, "eval_gpu_utilization": 48, "eval_loss": 1.5454399585723877, "eval_precision": 0.7097494768850874, "eval_recall": 0.7045, "eval_runtime": 2.5035, "eval_samples_per_second": 798.873, "eval_steps_per_second": 25.165, "eval_system_ram_total": 83.48074722290039, "eval_system_ram_used": 4.293117523193359, "step": 2444 }, { "epoch": 10.53, "learning_rate": 5.973333333333334e-06, "loss": 0.0979, "step": 2632 }, { "epoch": 10.53, "eval_accuracy": 0.708, "eval_disk_space_total": 78.1898422241211, "eval_disk_space_used": 35.82805252075195, "eval_f1": 0.7090322597492875, "eval_gpu_ram_allocated": 2.089801788330078, "eval_gpu_ram_cached": 25.85546875, "eval_gpu_ram_total": 39.56402587890625, "eval_gpu_utilization": 45, "eval_loss": 1.64204740524292, "eval_precision": 0.7171054872018443, "eval_recall": 0.708, "eval_runtime": 2.5339, "eval_samples_per_second": 789.29, "eval_steps_per_second": 24.863, "eval_system_ram_total": 83.48074722290039, "eval_system_ram_used": 4.279300689697266, "step": 2632 }, { "epoch": 11.28, "learning_rate": 4.976e-06, "loss": 0.0818, "step": 2820 }, { "epoch": 11.28, "eval_accuracy": 0.7065, "eval_disk_space_total": 78.1898422241211, "eval_disk_space_used": 35.828128814697266, "eval_f1": 0.706242034421972, "eval_gpu_ram_allocated": 2.0898447036743164, "eval_gpu_ram_cached": 25.85546875, "eval_gpu_ram_total": 39.56402587890625, "eval_gpu_utilization": 49, "eval_loss": 1.686875820159912, "eval_precision": 0.7102028476355108, "eval_recall": 0.7065, "eval_runtime": 2.4408, "eval_samples_per_second": 819.396, "eval_steps_per_second": 25.811, "eval_system_ram_total": 83.48074722290039, "eval_system_ram_used": 4.2822418212890625, "step": 2820 }, { "epoch": 12.03, "learning_rate": 3.973333333333333e-06, "loss": 0.062, "step": 3008 }, { "epoch": 12.03, "eval_accuracy": 0.701, "eval_disk_space_total": 78.1898422241211, "eval_disk_space_used": 35.82817459106445, "eval_f1": 0.704316965060789, "eval_gpu_ram_allocated": 2.0900821685791016, "eval_gpu_ram_cached": 25.85546875, "eval_gpu_ram_total": 39.56402587890625, "eval_gpu_utilization": 50, "eval_loss": 1.781833291053772, "eval_precision": 0.7122852239266858, "eval_recall": 0.701, "eval_runtime": 2.3858, "eval_samples_per_second": 838.291, "eval_steps_per_second": 26.406, "eval_system_ram_total": 83.48074722290039, "eval_system_ram_used": 4.286445617675781, "step": 3008 }, { "epoch": 12.78, "learning_rate": 2.970666666666667e-06, "loss": 0.0433, "step": 3196 }, { "epoch": 12.78, "eval_accuracy": 0.707, "eval_disk_space_total": 78.1898422241211, "eval_disk_space_used": 35.82823181152344, "eval_f1": 0.70799964440731, "eval_gpu_ram_allocated": 2.089794635772705, "eval_gpu_ram_cached": 25.85546875, "eval_gpu_ram_total": 39.56402587890625, "eval_gpu_utilization": 49, "eval_loss": 1.7981120347976685, "eval_precision": 0.7109713384315803, "eval_recall": 0.707, "eval_runtime": 2.4151, "eval_samples_per_second": 828.137, "eval_steps_per_second": 26.086, "eval_system_ram_total": 83.48074722290039, "eval_system_ram_used": 4.266563415527344, "step": 3196 }, { "epoch": 13.54, "learning_rate": 1.968e-06, "loss": 0.0368, "step": 3384 }, { "epoch": 13.54, "eval_accuracy": 0.7055, "eval_disk_space_total": 78.1898422241211, "eval_disk_space_used": 35.828514099121094, "eval_f1": 0.7079190260942086, "eval_gpu_ram_allocated": 2.0898475646972656, "eval_gpu_ram_cached": 25.85546875, "eval_gpu_ram_total": 39.56402587890625, "eval_gpu_utilization": 47, "eval_loss": 1.8403420448303223, "eval_precision": 0.7131395828448935, "eval_recall": 0.7055, "eval_runtime": 2.4685, "eval_samples_per_second": 810.214, "eval_steps_per_second": 25.522, "eval_system_ram_total": 83.48074722290039, "eval_system_ram_used": 4.278324127197266, "step": 3384 }, { "epoch": 14.29, "learning_rate": 9.653333333333333e-07, "loss": 0.0379, "step": 3572 }, { "epoch": 14.29, "eval_accuracy": 0.705, "eval_disk_space_total": 78.1898422241211, "eval_disk_space_used": 35.828582763671875, "eval_f1": 0.7051869329304575, "eval_gpu_ram_allocated": 2.089784622192383, "eval_gpu_ram_cached": 25.85546875, "eval_gpu_ram_total": 39.56402587890625, "eval_gpu_utilization": 47, "eval_loss": 1.8535802364349365, "eval_precision": 0.7073671527926624, "eval_recall": 0.705, "eval_runtime": 2.4945, "eval_samples_per_second": 801.76, "eval_steps_per_second": 25.255, "eval_system_ram_total": 83.48074722290039, "eval_system_ram_used": 4.301258087158203, "step": 3572 } ], "max_steps": 3750, "num_train_epochs": 15, "total_flos": 7220464762017408.0, "trial_name": null, "trial_params": null }