diogopaes10's picture
End of training
66fe6a4
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 15.0,
"global_step": 3750,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"learning_rate": 1.999466666666667e-05,
"loss": 2.308,
"step": 1
},
{
"epoch": 0.75,
"learning_rate": 1.8997333333333335e-05,
"loss": 1.6916,
"step": 188
},
{
"epoch": 0.75,
"eval_accuracy": 0.6755,
"eval_disk_space_total": 78.1898422241211,
"eval_disk_space_used": 24.806385040283203,
"eval_f1": 0.6708054417489328,
"eval_gpu_ram_allocated": 2.089780330657959,
"eval_gpu_ram_cached": 25.85546875,
"eval_gpu_ram_total": 39.56402587890625,
"eval_gpu_utilization": 50,
"eval_loss": 1.1062816381454468,
"eval_precision": 0.690043017889279,
"eval_recall": 0.6755,
"eval_runtime": 2.3912,
"eval_samples_per_second": 836.398,
"eval_steps_per_second": 26.347,
"eval_system_ram_total": 83.48074722290039,
"eval_system_ram_used": 4.019077301025391,
"step": 188
},
{
"epoch": 1.5,
"learning_rate": 1.8e-05,
"loss": 0.9694,
"step": 376
},
{
"epoch": 1.5,
"eval_accuracy": 0.7195,
"eval_disk_space_total": 78.1898422241211,
"eval_disk_space_used": 29.64177703857422,
"eval_f1": 0.7181081417115642,
"eval_gpu_ram_allocated": 2.0897774696350098,
"eval_gpu_ram_cached": 25.85546875,
"eval_gpu_ram_total": 39.56402587890625,
"eval_gpu_utilization": 50,
"eval_loss": 0.9585903286933899,
"eval_precision": 0.719758443061289,
"eval_recall": 0.7195,
"eval_runtime": 2.3693,
"eval_samples_per_second": 844.141,
"eval_steps_per_second": 26.59,
"eval_system_ram_total": 83.48074722290039,
"eval_system_ram_used": 4.2536163330078125,
"step": 376
},
{
"epoch": 2.26,
"learning_rate": 1.6997333333333334e-05,
"loss": 0.8509,
"step": 564
},
{
"epoch": 2.26,
"eval_accuracy": 0.712,
"eval_disk_space_total": 78.1898422241211,
"eval_disk_space_used": 29.641841888427734,
"eval_f1": 0.7070168337920522,
"eval_gpu_ram_allocated": 2.089791774749756,
"eval_gpu_ram_cached": 25.85546875,
"eval_gpu_ram_total": 39.56402587890625,
"eval_gpu_utilization": 46,
"eval_loss": 0.9747923016548157,
"eval_precision": 0.7160570316458433,
"eval_recall": 0.712,
"eval_runtime": 2.4432,
"eval_samples_per_second": 818.586,
"eval_steps_per_second": 25.785,
"eval_system_ram_total": 83.48074722290039,
"eval_system_ram_used": 4.160213470458984,
"step": 564
},
{
"epoch": 3.01,
"learning_rate": 1.5994666666666668e-05,
"loss": 0.7475,
"step": 752
},
{
"epoch": 3.01,
"eval_accuracy": 0.714,
"eval_disk_space_total": 78.1898422241211,
"eval_disk_space_used": 29.641963958740234,
"eval_f1": 0.7122032912823338,
"eval_gpu_ram_allocated": 2.089776039123535,
"eval_gpu_ram_cached": 25.85546875,
"eval_gpu_ram_total": 39.56402587890625,
"eval_gpu_utilization": 50,
"eval_loss": 0.9446640014648438,
"eval_precision": 0.7148157467744413,
"eval_recall": 0.714,
"eval_runtime": 2.5063,
"eval_samples_per_second": 798.001,
"eval_steps_per_second": 25.137,
"eval_system_ram_total": 83.48074722290039,
"eval_system_ram_used": 4.160709381103516,
"step": 752
},
{
"epoch": 3.76,
"learning_rate": 1.4997333333333335e-05,
"loss": 0.5841,
"step": 940
},
{
"epoch": 3.76,
"eval_accuracy": 0.711,
"eval_disk_space_total": 78.1898422241211,
"eval_disk_space_used": 29.642024993896484,
"eval_f1": 0.7076606604060025,
"eval_gpu_ram_allocated": 2.089787483215332,
"eval_gpu_ram_cached": 25.85546875,
"eval_gpu_ram_total": 39.56402587890625,
"eval_gpu_utilization": 47,
"eval_loss": 1.0064291954040527,
"eval_precision": 0.7225290812411572,
"eval_recall": 0.711,
"eval_runtime": 2.4755,
"eval_samples_per_second": 807.933,
"eval_steps_per_second": 25.45,
"eval_system_ram_total": 83.48074722290039,
"eval_system_ram_used": 4.188880920410156,
"step": 940
},
{
"epoch": 4.51,
"learning_rate": 1.3994666666666668e-05,
"loss": 0.4972,
"step": 1128
},
{
"epoch": 4.51,
"eval_accuracy": 0.714,
"eval_disk_space_total": 78.1898422241211,
"eval_disk_space_used": 29.642135620117188,
"eval_f1": 0.7109995031569997,
"eval_gpu_ram_allocated": 2.089801788330078,
"eval_gpu_ram_cached": 25.85546875,
"eval_gpu_ram_total": 39.56402587890625,
"eval_gpu_utilization": 47,
"eval_loss": 1.0585097074508667,
"eval_precision": 0.7129473752365556,
"eval_recall": 0.714,
"eval_runtime": 2.3843,
"eval_samples_per_second": 838.824,
"eval_steps_per_second": 26.423,
"eval_system_ram_total": 83.48074722290039,
"eval_system_ram_used": 4.176631927490234,
"step": 1128
},
{
"epoch": 5.26,
"learning_rate": 1.2992e-05,
"loss": 0.4555,
"step": 1316
},
{
"epoch": 5.26,
"eval_accuracy": 0.7075,
"eval_disk_space_total": 78.1898422241211,
"eval_disk_space_used": 33.76519775390625,
"eval_f1": 0.7086283787248422,
"eval_gpu_ram_allocated": 2.089810371398926,
"eval_gpu_ram_cached": 25.85546875,
"eval_gpu_ram_total": 39.56402587890625,
"eval_gpu_utilization": 46,
"eval_loss": 1.117536187171936,
"eval_precision": 0.71510102752271,
"eval_recall": 0.7075,
"eval_runtime": 2.5545,
"eval_samples_per_second": 782.936,
"eval_steps_per_second": 24.662,
"eval_system_ram_total": 83.48074722290039,
"eval_system_ram_used": 4.225734710693359,
"step": 1316
},
{
"epoch": 6.02,
"learning_rate": 1.1989333333333336e-05,
"loss": 0.3535,
"step": 1504
},
{
"epoch": 6.02,
"eval_accuracy": 0.708,
"eval_disk_space_total": 78.1898422241211,
"eval_disk_space_used": 33.76530456542969,
"eval_f1": 0.7032209621498534,
"eval_gpu_ram_allocated": 2.0898032188415527,
"eval_gpu_ram_cached": 25.85546875,
"eval_gpu_ram_total": 39.56402587890625,
"eval_gpu_utilization": 50,
"eval_loss": 1.1748836040496826,
"eval_precision": 0.7076659711678004,
"eval_recall": 0.708,
"eval_runtime": 2.3932,
"eval_samples_per_second": 835.715,
"eval_steps_per_second": 26.325,
"eval_system_ram_total": 83.48074722290039,
"eval_system_ram_used": 4.23016357421875,
"step": 1504
},
{
"epoch": 6.77,
"learning_rate": 1.0986666666666668e-05,
"loss": 0.2614,
"step": 1692
},
{
"epoch": 6.77,
"eval_accuracy": 0.709,
"eval_disk_space_total": 78.1898422241211,
"eval_disk_space_used": 33.76542282104492,
"eval_f1": 0.7056311006074188,
"eval_gpu_ram_allocated": 2.089783191680908,
"eval_gpu_ram_cached": 25.85546875,
"eval_gpu_ram_total": 39.56402587890625,
"eval_gpu_utilization": 49,
"eval_loss": 1.2027860879898071,
"eval_precision": 0.7079398723985221,
"eval_recall": 0.709,
"eval_runtime": 2.3888,
"eval_samples_per_second": 837.234,
"eval_steps_per_second": 26.373,
"eval_system_ram_total": 83.48074722290039,
"eval_system_ram_used": 4.237628936767578,
"step": 1692
},
{
"epoch": 7.52,
"learning_rate": 9.984e-06,
"loss": 0.2321,
"step": 1880
},
{
"epoch": 7.52,
"eval_accuracy": 0.698,
"eval_disk_space_total": 78.1898422241211,
"eval_disk_space_used": 33.7656135559082,
"eval_f1": 0.7018556265437493,
"eval_gpu_ram_allocated": 2.089846134185791,
"eval_gpu_ram_cached": 25.85546875,
"eval_gpu_ram_total": 39.56402587890625,
"eval_gpu_utilization": 49,
"eval_loss": 1.2960551977157593,
"eval_precision": 0.708462957552084,
"eval_recall": 0.698,
"eval_runtime": 2.391,
"eval_samples_per_second": 836.478,
"eval_steps_per_second": 26.349,
"eval_system_ram_total": 83.48074722290039,
"eval_system_ram_used": 4.224781036376953,
"step": 1880
},
{
"epoch": 8.27,
"learning_rate": 8.981333333333333e-06,
"loss": 0.197,
"step": 2068
},
{
"epoch": 8.27,
"eval_accuracy": 0.712,
"eval_disk_space_total": 78.1898422241211,
"eval_disk_space_used": 33.7657470703125,
"eval_f1": 0.7097931257647566,
"eval_gpu_ram_allocated": 2.0897903442382812,
"eval_gpu_ram_cached": 25.85546875,
"eval_gpu_ram_total": 39.56402587890625,
"eval_gpu_utilization": 45,
"eval_loss": 1.3960117101669312,
"eval_precision": 0.7137187449926237,
"eval_recall": 0.712,
"eval_runtime": 2.3878,
"eval_samples_per_second": 837.604,
"eval_steps_per_second": 26.385,
"eval_system_ram_total": 83.48074722290039,
"eval_system_ram_used": 4.219398498535156,
"step": 2068
},
{
"epoch": 9.02,
"learning_rate": 7.978666666666667e-06,
"loss": 0.1505,
"step": 2256
},
{
"epoch": 9.02,
"eval_accuracy": 0.7075,
"eval_disk_space_total": 78.1898422241211,
"eval_disk_space_used": 35.827659606933594,
"eval_f1": 0.709341703450241,
"eval_gpu_ram_allocated": 2.0897817611694336,
"eval_gpu_ram_cached": 25.85546875,
"eval_gpu_ram_total": 39.56402587890625,
"eval_gpu_utilization": 48,
"eval_loss": 1.4310206174850464,
"eval_precision": 0.7133423622104005,
"eval_recall": 0.7075,
"eval_runtime": 2.4471,
"eval_samples_per_second": 817.29,
"eval_steps_per_second": 25.745,
"eval_system_ram_total": 83.48074722290039,
"eval_system_ram_used": 4.2417755126953125,
"step": 2256
},
{
"epoch": 9.78,
"learning_rate": 6.976000000000001e-06,
"loss": 0.1132,
"step": 2444
},
{
"epoch": 9.78,
"eval_accuracy": 0.7045,
"eval_disk_space_total": 78.1898422241211,
"eval_disk_space_used": 35.827754974365234,
"eval_f1": 0.705265213679387,
"eval_gpu_ram_allocated": 2.089801788330078,
"eval_gpu_ram_cached": 25.85546875,
"eval_gpu_ram_total": 39.56402587890625,
"eval_gpu_utilization": 48,
"eval_loss": 1.5454399585723877,
"eval_precision": 0.7097494768850874,
"eval_recall": 0.7045,
"eval_runtime": 2.5035,
"eval_samples_per_second": 798.873,
"eval_steps_per_second": 25.165,
"eval_system_ram_total": 83.48074722290039,
"eval_system_ram_used": 4.293117523193359,
"step": 2444
},
{
"epoch": 10.53,
"learning_rate": 5.973333333333334e-06,
"loss": 0.0979,
"step": 2632
},
{
"epoch": 10.53,
"eval_accuracy": 0.708,
"eval_disk_space_total": 78.1898422241211,
"eval_disk_space_used": 35.82805252075195,
"eval_f1": 0.7090322597492875,
"eval_gpu_ram_allocated": 2.089801788330078,
"eval_gpu_ram_cached": 25.85546875,
"eval_gpu_ram_total": 39.56402587890625,
"eval_gpu_utilization": 45,
"eval_loss": 1.64204740524292,
"eval_precision": 0.7171054872018443,
"eval_recall": 0.708,
"eval_runtime": 2.5339,
"eval_samples_per_second": 789.29,
"eval_steps_per_second": 24.863,
"eval_system_ram_total": 83.48074722290039,
"eval_system_ram_used": 4.279300689697266,
"step": 2632
},
{
"epoch": 11.28,
"learning_rate": 4.976e-06,
"loss": 0.0818,
"step": 2820
},
{
"epoch": 11.28,
"eval_accuracy": 0.7065,
"eval_disk_space_total": 78.1898422241211,
"eval_disk_space_used": 35.828128814697266,
"eval_f1": 0.706242034421972,
"eval_gpu_ram_allocated": 2.0898447036743164,
"eval_gpu_ram_cached": 25.85546875,
"eval_gpu_ram_total": 39.56402587890625,
"eval_gpu_utilization": 49,
"eval_loss": 1.686875820159912,
"eval_precision": 0.7102028476355108,
"eval_recall": 0.7065,
"eval_runtime": 2.4408,
"eval_samples_per_second": 819.396,
"eval_steps_per_second": 25.811,
"eval_system_ram_total": 83.48074722290039,
"eval_system_ram_used": 4.2822418212890625,
"step": 2820
},
{
"epoch": 12.03,
"learning_rate": 3.973333333333333e-06,
"loss": 0.062,
"step": 3008
},
{
"epoch": 12.03,
"eval_accuracy": 0.701,
"eval_disk_space_total": 78.1898422241211,
"eval_disk_space_used": 35.82817459106445,
"eval_f1": 0.704316965060789,
"eval_gpu_ram_allocated": 2.0900821685791016,
"eval_gpu_ram_cached": 25.85546875,
"eval_gpu_ram_total": 39.56402587890625,
"eval_gpu_utilization": 50,
"eval_loss": 1.781833291053772,
"eval_precision": 0.7122852239266858,
"eval_recall": 0.701,
"eval_runtime": 2.3858,
"eval_samples_per_second": 838.291,
"eval_steps_per_second": 26.406,
"eval_system_ram_total": 83.48074722290039,
"eval_system_ram_used": 4.286445617675781,
"step": 3008
},
{
"epoch": 12.78,
"learning_rate": 2.970666666666667e-06,
"loss": 0.0433,
"step": 3196
},
{
"epoch": 12.78,
"eval_accuracy": 0.707,
"eval_disk_space_total": 78.1898422241211,
"eval_disk_space_used": 35.82823181152344,
"eval_f1": 0.70799964440731,
"eval_gpu_ram_allocated": 2.089794635772705,
"eval_gpu_ram_cached": 25.85546875,
"eval_gpu_ram_total": 39.56402587890625,
"eval_gpu_utilization": 49,
"eval_loss": 1.7981120347976685,
"eval_precision": 0.7109713384315803,
"eval_recall": 0.707,
"eval_runtime": 2.4151,
"eval_samples_per_second": 828.137,
"eval_steps_per_second": 26.086,
"eval_system_ram_total": 83.48074722290039,
"eval_system_ram_used": 4.266563415527344,
"step": 3196
},
{
"epoch": 13.54,
"learning_rate": 1.968e-06,
"loss": 0.0368,
"step": 3384
},
{
"epoch": 13.54,
"eval_accuracy": 0.7055,
"eval_disk_space_total": 78.1898422241211,
"eval_disk_space_used": 35.828514099121094,
"eval_f1": 0.7079190260942086,
"eval_gpu_ram_allocated": 2.0898475646972656,
"eval_gpu_ram_cached": 25.85546875,
"eval_gpu_ram_total": 39.56402587890625,
"eval_gpu_utilization": 47,
"eval_loss": 1.8403420448303223,
"eval_precision": 0.7131395828448935,
"eval_recall": 0.7055,
"eval_runtime": 2.4685,
"eval_samples_per_second": 810.214,
"eval_steps_per_second": 25.522,
"eval_system_ram_total": 83.48074722290039,
"eval_system_ram_used": 4.278324127197266,
"step": 3384
},
{
"epoch": 14.29,
"learning_rate": 9.653333333333333e-07,
"loss": 0.0379,
"step": 3572
},
{
"epoch": 14.29,
"eval_accuracy": 0.705,
"eval_disk_space_total": 78.1898422241211,
"eval_disk_space_used": 35.828582763671875,
"eval_f1": 0.7051869329304575,
"eval_gpu_ram_allocated": 2.089784622192383,
"eval_gpu_ram_cached": 25.85546875,
"eval_gpu_ram_total": 39.56402587890625,
"eval_gpu_utilization": 47,
"eval_loss": 1.8535802364349365,
"eval_precision": 0.7073671527926624,
"eval_recall": 0.705,
"eval_runtime": 2.4945,
"eval_samples_per_second": 801.76,
"eval_steps_per_second": 25.255,
"eval_system_ram_total": 83.48074722290039,
"eval_system_ram_used": 4.301258087158203,
"step": 3572
}
],
"max_steps": 3750,
"num_train_epochs": 15,
"total_flos": 7220464762017408.0,
"trial_name": null,
"trial_params": null
}