|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 15.0, |
|
"global_step": 3750, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 1.999466666666667e-05, |
|
"loss": 2.308, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 1.8997333333333335e-05, |
|
"loss": 1.6916, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"eval_accuracy": 0.6755, |
|
"eval_disk_space_total": 78.1898422241211, |
|
"eval_disk_space_used": 24.806385040283203, |
|
"eval_f1": 0.6708054417489328, |
|
"eval_gpu_ram_allocated": 2.089780330657959, |
|
"eval_gpu_ram_cached": 25.85546875, |
|
"eval_gpu_ram_total": 39.56402587890625, |
|
"eval_gpu_utilization": 50, |
|
"eval_loss": 1.1062816381454468, |
|
"eval_precision": 0.690043017889279, |
|
"eval_recall": 0.6755, |
|
"eval_runtime": 2.3912, |
|
"eval_samples_per_second": 836.398, |
|
"eval_steps_per_second": 26.347, |
|
"eval_system_ram_total": 83.48074722290039, |
|
"eval_system_ram_used": 4.019077301025391, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 1.8e-05, |
|
"loss": 0.9694, |
|
"step": 376 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"eval_accuracy": 0.7195, |
|
"eval_disk_space_total": 78.1898422241211, |
|
"eval_disk_space_used": 29.64177703857422, |
|
"eval_f1": 0.7181081417115642, |
|
"eval_gpu_ram_allocated": 2.0897774696350098, |
|
"eval_gpu_ram_cached": 25.85546875, |
|
"eval_gpu_ram_total": 39.56402587890625, |
|
"eval_gpu_utilization": 50, |
|
"eval_loss": 0.9585903286933899, |
|
"eval_precision": 0.719758443061289, |
|
"eval_recall": 0.7195, |
|
"eval_runtime": 2.3693, |
|
"eval_samples_per_second": 844.141, |
|
"eval_steps_per_second": 26.59, |
|
"eval_system_ram_total": 83.48074722290039, |
|
"eval_system_ram_used": 4.2536163330078125, |
|
"step": 376 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 1.6997333333333334e-05, |
|
"loss": 0.8509, |
|
"step": 564 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"eval_accuracy": 0.712, |
|
"eval_disk_space_total": 78.1898422241211, |
|
"eval_disk_space_used": 29.641841888427734, |
|
"eval_f1": 0.7070168337920522, |
|
"eval_gpu_ram_allocated": 2.089791774749756, |
|
"eval_gpu_ram_cached": 25.85546875, |
|
"eval_gpu_ram_total": 39.56402587890625, |
|
"eval_gpu_utilization": 46, |
|
"eval_loss": 0.9747923016548157, |
|
"eval_precision": 0.7160570316458433, |
|
"eval_recall": 0.712, |
|
"eval_runtime": 2.4432, |
|
"eval_samples_per_second": 818.586, |
|
"eval_steps_per_second": 25.785, |
|
"eval_system_ram_total": 83.48074722290039, |
|
"eval_system_ram_used": 4.160213470458984, |
|
"step": 564 |
|
}, |
|
{ |
|
"epoch": 3.01, |
|
"learning_rate": 1.5994666666666668e-05, |
|
"loss": 0.7475, |
|
"step": 752 |
|
}, |
|
{ |
|
"epoch": 3.01, |
|
"eval_accuracy": 0.714, |
|
"eval_disk_space_total": 78.1898422241211, |
|
"eval_disk_space_used": 29.641963958740234, |
|
"eval_f1": 0.7122032912823338, |
|
"eval_gpu_ram_allocated": 2.089776039123535, |
|
"eval_gpu_ram_cached": 25.85546875, |
|
"eval_gpu_ram_total": 39.56402587890625, |
|
"eval_gpu_utilization": 50, |
|
"eval_loss": 0.9446640014648438, |
|
"eval_precision": 0.7148157467744413, |
|
"eval_recall": 0.714, |
|
"eval_runtime": 2.5063, |
|
"eval_samples_per_second": 798.001, |
|
"eval_steps_per_second": 25.137, |
|
"eval_system_ram_total": 83.48074722290039, |
|
"eval_system_ram_used": 4.160709381103516, |
|
"step": 752 |
|
}, |
|
{ |
|
"epoch": 3.76, |
|
"learning_rate": 1.4997333333333335e-05, |
|
"loss": 0.5841, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 3.76, |
|
"eval_accuracy": 0.711, |
|
"eval_disk_space_total": 78.1898422241211, |
|
"eval_disk_space_used": 29.642024993896484, |
|
"eval_f1": 0.7076606604060025, |
|
"eval_gpu_ram_allocated": 2.089787483215332, |
|
"eval_gpu_ram_cached": 25.85546875, |
|
"eval_gpu_ram_total": 39.56402587890625, |
|
"eval_gpu_utilization": 47, |
|
"eval_loss": 1.0064291954040527, |
|
"eval_precision": 0.7225290812411572, |
|
"eval_recall": 0.711, |
|
"eval_runtime": 2.4755, |
|
"eval_samples_per_second": 807.933, |
|
"eval_steps_per_second": 25.45, |
|
"eval_system_ram_total": 83.48074722290039, |
|
"eval_system_ram_used": 4.188880920410156, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 4.51, |
|
"learning_rate": 1.3994666666666668e-05, |
|
"loss": 0.4972, |
|
"step": 1128 |
|
}, |
|
{ |
|
"epoch": 4.51, |
|
"eval_accuracy": 0.714, |
|
"eval_disk_space_total": 78.1898422241211, |
|
"eval_disk_space_used": 29.642135620117188, |
|
"eval_f1": 0.7109995031569997, |
|
"eval_gpu_ram_allocated": 2.089801788330078, |
|
"eval_gpu_ram_cached": 25.85546875, |
|
"eval_gpu_ram_total": 39.56402587890625, |
|
"eval_gpu_utilization": 47, |
|
"eval_loss": 1.0585097074508667, |
|
"eval_precision": 0.7129473752365556, |
|
"eval_recall": 0.714, |
|
"eval_runtime": 2.3843, |
|
"eval_samples_per_second": 838.824, |
|
"eval_steps_per_second": 26.423, |
|
"eval_system_ram_total": 83.48074722290039, |
|
"eval_system_ram_used": 4.176631927490234, |
|
"step": 1128 |
|
}, |
|
{ |
|
"epoch": 5.26, |
|
"learning_rate": 1.2992e-05, |
|
"loss": 0.4555, |
|
"step": 1316 |
|
}, |
|
{ |
|
"epoch": 5.26, |
|
"eval_accuracy": 0.7075, |
|
"eval_disk_space_total": 78.1898422241211, |
|
"eval_disk_space_used": 33.76519775390625, |
|
"eval_f1": 0.7086283787248422, |
|
"eval_gpu_ram_allocated": 2.089810371398926, |
|
"eval_gpu_ram_cached": 25.85546875, |
|
"eval_gpu_ram_total": 39.56402587890625, |
|
"eval_gpu_utilization": 46, |
|
"eval_loss": 1.117536187171936, |
|
"eval_precision": 0.71510102752271, |
|
"eval_recall": 0.7075, |
|
"eval_runtime": 2.5545, |
|
"eval_samples_per_second": 782.936, |
|
"eval_steps_per_second": 24.662, |
|
"eval_system_ram_total": 83.48074722290039, |
|
"eval_system_ram_used": 4.225734710693359, |
|
"step": 1316 |
|
}, |
|
{ |
|
"epoch": 6.02, |
|
"learning_rate": 1.1989333333333336e-05, |
|
"loss": 0.3535, |
|
"step": 1504 |
|
}, |
|
{ |
|
"epoch": 6.02, |
|
"eval_accuracy": 0.708, |
|
"eval_disk_space_total": 78.1898422241211, |
|
"eval_disk_space_used": 33.76530456542969, |
|
"eval_f1": 0.7032209621498534, |
|
"eval_gpu_ram_allocated": 2.0898032188415527, |
|
"eval_gpu_ram_cached": 25.85546875, |
|
"eval_gpu_ram_total": 39.56402587890625, |
|
"eval_gpu_utilization": 50, |
|
"eval_loss": 1.1748836040496826, |
|
"eval_precision": 0.7076659711678004, |
|
"eval_recall": 0.708, |
|
"eval_runtime": 2.3932, |
|
"eval_samples_per_second": 835.715, |
|
"eval_steps_per_second": 26.325, |
|
"eval_system_ram_total": 83.48074722290039, |
|
"eval_system_ram_used": 4.23016357421875, |
|
"step": 1504 |
|
}, |
|
{ |
|
"epoch": 6.77, |
|
"learning_rate": 1.0986666666666668e-05, |
|
"loss": 0.2614, |
|
"step": 1692 |
|
}, |
|
{ |
|
"epoch": 6.77, |
|
"eval_accuracy": 0.709, |
|
"eval_disk_space_total": 78.1898422241211, |
|
"eval_disk_space_used": 33.76542282104492, |
|
"eval_f1": 0.7056311006074188, |
|
"eval_gpu_ram_allocated": 2.089783191680908, |
|
"eval_gpu_ram_cached": 25.85546875, |
|
"eval_gpu_ram_total": 39.56402587890625, |
|
"eval_gpu_utilization": 49, |
|
"eval_loss": 1.2027860879898071, |
|
"eval_precision": 0.7079398723985221, |
|
"eval_recall": 0.709, |
|
"eval_runtime": 2.3888, |
|
"eval_samples_per_second": 837.234, |
|
"eval_steps_per_second": 26.373, |
|
"eval_system_ram_total": 83.48074722290039, |
|
"eval_system_ram_used": 4.237628936767578, |
|
"step": 1692 |
|
}, |
|
{ |
|
"epoch": 7.52, |
|
"learning_rate": 9.984e-06, |
|
"loss": 0.2321, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 7.52, |
|
"eval_accuracy": 0.698, |
|
"eval_disk_space_total": 78.1898422241211, |
|
"eval_disk_space_used": 33.7656135559082, |
|
"eval_f1": 0.7018556265437493, |
|
"eval_gpu_ram_allocated": 2.089846134185791, |
|
"eval_gpu_ram_cached": 25.85546875, |
|
"eval_gpu_ram_total": 39.56402587890625, |
|
"eval_gpu_utilization": 49, |
|
"eval_loss": 1.2960551977157593, |
|
"eval_precision": 0.708462957552084, |
|
"eval_recall": 0.698, |
|
"eval_runtime": 2.391, |
|
"eval_samples_per_second": 836.478, |
|
"eval_steps_per_second": 26.349, |
|
"eval_system_ram_total": 83.48074722290039, |
|
"eval_system_ram_used": 4.224781036376953, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 8.27, |
|
"learning_rate": 8.981333333333333e-06, |
|
"loss": 0.197, |
|
"step": 2068 |
|
}, |
|
{ |
|
"epoch": 8.27, |
|
"eval_accuracy": 0.712, |
|
"eval_disk_space_total": 78.1898422241211, |
|
"eval_disk_space_used": 33.7657470703125, |
|
"eval_f1": 0.7097931257647566, |
|
"eval_gpu_ram_allocated": 2.0897903442382812, |
|
"eval_gpu_ram_cached": 25.85546875, |
|
"eval_gpu_ram_total": 39.56402587890625, |
|
"eval_gpu_utilization": 45, |
|
"eval_loss": 1.3960117101669312, |
|
"eval_precision": 0.7137187449926237, |
|
"eval_recall": 0.712, |
|
"eval_runtime": 2.3878, |
|
"eval_samples_per_second": 837.604, |
|
"eval_steps_per_second": 26.385, |
|
"eval_system_ram_total": 83.48074722290039, |
|
"eval_system_ram_used": 4.219398498535156, |
|
"step": 2068 |
|
}, |
|
{ |
|
"epoch": 9.02, |
|
"learning_rate": 7.978666666666667e-06, |
|
"loss": 0.1505, |
|
"step": 2256 |
|
}, |
|
{ |
|
"epoch": 9.02, |
|
"eval_accuracy": 0.7075, |
|
"eval_disk_space_total": 78.1898422241211, |
|
"eval_disk_space_used": 35.827659606933594, |
|
"eval_f1": 0.709341703450241, |
|
"eval_gpu_ram_allocated": 2.0897817611694336, |
|
"eval_gpu_ram_cached": 25.85546875, |
|
"eval_gpu_ram_total": 39.56402587890625, |
|
"eval_gpu_utilization": 48, |
|
"eval_loss": 1.4310206174850464, |
|
"eval_precision": 0.7133423622104005, |
|
"eval_recall": 0.7075, |
|
"eval_runtime": 2.4471, |
|
"eval_samples_per_second": 817.29, |
|
"eval_steps_per_second": 25.745, |
|
"eval_system_ram_total": 83.48074722290039, |
|
"eval_system_ram_used": 4.2417755126953125, |
|
"step": 2256 |
|
}, |
|
{ |
|
"epoch": 9.78, |
|
"learning_rate": 6.976000000000001e-06, |
|
"loss": 0.1132, |
|
"step": 2444 |
|
}, |
|
{ |
|
"epoch": 9.78, |
|
"eval_accuracy": 0.7045, |
|
"eval_disk_space_total": 78.1898422241211, |
|
"eval_disk_space_used": 35.827754974365234, |
|
"eval_f1": 0.705265213679387, |
|
"eval_gpu_ram_allocated": 2.089801788330078, |
|
"eval_gpu_ram_cached": 25.85546875, |
|
"eval_gpu_ram_total": 39.56402587890625, |
|
"eval_gpu_utilization": 48, |
|
"eval_loss": 1.5454399585723877, |
|
"eval_precision": 0.7097494768850874, |
|
"eval_recall": 0.7045, |
|
"eval_runtime": 2.5035, |
|
"eval_samples_per_second": 798.873, |
|
"eval_steps_per_second": 25.165, |
|
"eval_system_ram_total": 83.48074722290039, |
|
"eval_system_ram_used": 4.293117523193359, |
|
"step": 2444 |
|
}, |
|
{ |
|
"epoch": 10.53, |
|
"learning_rate": 5.973333333333334e-06, |
|
"loss": 0.0979, |
|
"step": 2632 |
|
}, |
|
{ |
|
"epoch": 10.53, |
|
"eval_accuracy": 0.708, |
|
"eval_disk_space_total": 78.1898422241211, |
|
"eval_disk_space_used": 35.82805252075195, |
|
"eval_f1": 0.7090322597492875, |
|
"eval_gpu_ram_allocated": 2.089801788330078, |
|
"eval_gpu_ram_cached": 25.85546875, |
|
"eval_gpu_ram_total": 39.56402587890625, |
|
"eval_gpu_utilization": 45, |
|
"eval_loss": 1.64204740524292, |
|
"eval_precision": 0.7171054872018443, |
|
"eval_recall": 0.708, |
|
"eval_runtime": 2.5339, |
|
"eval_samples_per_second": 789.29, |
|
"eval_steps_per_second": 24.863, |
|
"eval_system_ram_total": 83.48074722290039, |
|
"eval_system_ram_used": 4.279300689697266, |
|
"step": 2632 |
|
}, |
|
{ |
|
"epoch": 11.28, |
|
"learning_rate": 4.976e-06, |
|
"loss": 0.0818, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 11.28, |
|
"eval_accuracy": 0.7065, |
|
"eval_disk_space_total": 78.1898422241211, |
|
"eval_disk_space_used": 35.828128814697266, |
|
"eval_f1": 0.706242034421972, |
|
"eval_gpu_ram_allocated": 2.0898447036743164, |
|
"eval_gpu_ram_cached": 25.85546875, |
|
"eval_gpu_ram_total": 39.56402587890625, |
|
"eval_gpu_utilization": 49, |
|
"eval_loss": 1.686875820159912, |
|
"eval_precision": 0.7102028476355108, |
|
"eval_recall": 0.7065, |
|
"eval_runtime": 2.4408, |
|
"eval_samples_per_second": 819.396, |
|
"eval_steps_per_second": 25.811, |
|
"eval_system_ram_total": 83.48074722290039, |
|
"eval_system_ram_used": 4.2822418212890625, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 12.03, |
|
"learning_rate": 3.973333333333333e-06, |
|
"loss": 0.062, |
|
"step": 3008 |
|
}, |
|
{ |
|
"epoch": 12.03, |
|
"eval_accuracy": 0.701, |
|
"eval_disk_space_total": 78.1898422241211, |
|
"eval_disk_space_used": 35.82817459106445, |
|
"eval_f1": 0.704316965060789, |
|
"eval_gpu_ram_allocated": 2.0900821685791016, |
|
"eval_gpu_ram_cached": 25.85546875, |
|
"eval_gpu_ram_total": 39.56402587890625, |
|
"eval_gpu_utilization": 50, |
|
"eval_loss": 1.781833291053772, |
|
"eval_precision": 0.7122852239266858, |
|
"eval_recall": 0.701, |
|
"eval_runtime": 2.3858, |
|
"eval_samples_per_second": 838.291, |
|
"eval_steps_per_second": 26.406, |
|
"eval_system_ram_total": 83.48074722290039, |
|
"eval_system_ram_used": 4.286445617675781, |
|
"step": 3008 |
|
}, |
|
{ |
|
"epoch": 12.78, |
|
"learning_rate": 2.970666666666667e-06, |
|
"loss": 0.0433, |
|
"step": 3196 |
|
}, |
|
{ |
|
"epoch": 12.78, |
|
"eval_accuracy": 0.707, |
|
"eval_disk_space_total": 78.1898422241211, |
|
"eval_disk_space_used": 35.82823181152344, |
|
"eval_f1": 0.70799964440731, |
|
"eval_gpu_ram_allocated": 2.089794635772705, |
|
"eval_gpu_ram_cached": 25.85546875, |
|
"eval_gpu_ram_total": 39.56402587890625, |
|
"eval_gpu_utilization": 49, |
|
"eval_loss": 1.7981120347976685, |
|
"eval_precision": 0.7109713384315803, |
|
"eval_recall": 0.707, |
|
"eval_runtime": 2.4151, |
|
"eval_samples_per_second": 828.137, |
|
"eval_steps_per_second": 26.086, |
|
"eval_system_ram_total": 83.48074722290039, |
|
"eval_system_ram_used": 4.266563415527344, |
|
"step": 3196 |
|
}, |
|
{ |
|
"epoch": 13.54, |
|
"learning_rate": 1.968e-06, |
|
"loss": 0.0368, |
|
"step": 3384 |
|
}, |
|
{ |
|
"epoch": 13.54, |
|
"eval_accuracy": 0.7055, |
|
"eval_disk_space_total": 78.1898422241211, |
|
"eval_disk_space_used": 35.828514099121094, |
|
"eval_f1": 0.7079190260942086, |
|
"eval_gpu_ram_allocated": 2.0898475646972656, |
|
"eval_gpu_ram_cached": 25.85546875, |
|
"eval_gpu_ram_total": 39.56402587890625, |
|
"eval_gpu_utilization": 47, |
|
"eval_loss": 1.8403420448303223, |
|
"eval_precision": 0.7131395828448935, |
|
"eval_recall": 0.7055, |
|
"eval_runtime": 2.4685, |
|
"eval_samples_per_second": 810.214, |
|
"eval_steps_per_second": 25.522, |
|
"eval_system_ram_total": 83.48074722290039, |
|
"eval_system_ram_used": 4.278324127197266, |
|
"step": 3384 |
|
}, |
|
{ |
|
"epoch": 14.29, |
|
"learning_rate": 9.653333333333333e-07, |
|
"loss": 0.0379, |
|
"step": 3572 |
|
}, |
|
{ |
|
"epoch": 14.29, |
|
"eval_accuracy": 0.705, |
|
"eval_disk_space_total": 78.1898422241211, |
|
"eval_disk_space_used": 35.828582763671875, |
|
"eval_f1": 0.7051869329304575, |
|
"eval_gpu_ram_allocated": 2.089784622192383, |
|
"eval_gpu_ram_cached": 25.85546875, |
|
"eval_gpu_ram_total": 39.56402587890625, |
|
"eval_gpu_utilization": 47, |
|
"eval_loss": 1.8535802364349365, |
|
"eval_precision": 0.7073671527926624, |
|
"eval_recall": 0.705, |
|
"eval_runtime": 2.4945, |
|
"eval_samples_per_second": 801.76, |
|
"eval_steps_per_second": 25.255, |
|
"eval_system_ram_total": 83.48074722290039, |
|
"eval_system_ram_used": 4.301258087158203, |
|
"step": 3572 |
|
} |
|
], |
|
"max_steps": 3750, |
|
"num_train_epochs": 15, |
|
"total_flos": 7220464762017408.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|