|
{ |
|
"best_metric": 0.9796296296296296, |
|
"best_model_checkpoint": "bottom-layers-finetuned-eurosat/checkpoint-504", |
|
"epoch": 2.986666666666667, |
|
"eval_steps": 500, |
|
"global_step": 504, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.901960784313726e-06, |
|
"loss": 2.2965, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 9.803921568627451e-06, |
|
"loss": 2.2868, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 1.4705882352941177e-05, |
|
"loss": 2.2711, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 1.9607843137254903e-05, |
|
"loss": 2.239, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 2.4509803921568626e-05, |
|
"loss": 2.1905, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 2.9411764705882354e-05, |
|
"loss": 2.1375, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 3.431372549019608e-05, |
|
"loss": 2.0589, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 3.9215686274509805e-05, |
|
"loss": 1.9787, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.411764705882353e-05, |
|
"loss": 1.8736, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.901960784313725e-05, |
|
"loss": 1.7237, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.955849889624724e-05, |
|
"loss": 1.6018, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.900662251655629e-05, |
|
"loss": 1.4656, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 4.845474613686534e-05, |
|
"loss": 1.3407, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 4.7902869757174393e-05, |
|
"loss": 1.2151, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 4.735099337748345e-05, |
|
"loss": 1.1103, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 4.67991169977925e-05, |
|
"loss": 0.9809, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 4.624724061810155e-05, |
|
"loss": 0.9362, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 4.56953642384106e-05, |
|
"loss": 0.8689, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 4.514348785871965e-05, |
|
"loss": 0.8, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 4.45916114790287e-05, |
|
"loss": 0.7548, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 4.403973509933775e-05, |
|
"loss": 0.7139, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 4.34878587196468e-05, |
|
"loss": 0.6564, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 4.293598233995585e-05, |
|
"loss": 0.629, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 4.23841059602649e-05, |
|
"loss": 0.5827, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 4.1832229580573955e-05, |
|
"loss": 0.5532, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 4.1280353200883e-05, |
|
"loss": 0.529, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 4.0728476821192055e-05, |
|
"loss": 0.5351, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 4.017660044150111e-05, |
|
"loss": 0.5099, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 3.962472406181016e-05, |
|
"loss": 0.4486, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 3.907284768211921e-05, |
|
"loss": 0.4603, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 3.852097130242826e-05, |
|
"loss": 0.4566, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 3.796909492273731e-05, |
|
"loss": 0.4401, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 3.741721854304636e-05, |
|
"loss": 0.4395, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.9648148148148148, |
|
"eval_loss": 0.3520647883415222, |
|
"eval_runtime": 68.6805, |
|
"eval_samples_per_second": 78.625, |
|
"eval_steps_per_second": 2.461, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 3.686534216335541e-05, |
|
"loss": 0.423, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 3.631346578366446e-05, |
|
"loss": 0.3978, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 3.576158940397351e-05, |
|
"loss": 0.3975, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 3.520971302428256e-05, |
|
"loss": 0.3968, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 3.465783664459161e-05, |
|
"loss": 0.3974, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 3.410596026490066e-05, |
|
"loss": 0.3757, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 3.3554083885209716e-05, |
|
"loss": 0.3432, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 3.300220750551877e-05, |
|
"loss": 0.3592, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 3.2450331125827816e-05, |
|
"loss": 0.3528, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 3.189845474613687e-05, |
|
"loss": 0.3405, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 3.134657836644592e-05, |
|
"loss": 0.3274, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 3.079470198675497e-05, |
|
"loss": 0.3841, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 3.0242825607064017e-05, |
|
"loss": 0.3571, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 2.969094922737307e-05, |
|
"loss": 0.3171, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 2.913907284768212e-05, |
|
"loss": 0.3445, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 2.8587196467991174e-05, |
|
"loss": 0.3717, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 2.803532008830022e-05, |
|
"loss": 0.3768, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 2.7483443708609274e-05, |
|
"loss": 0.3281, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 2.693156732891832e-05, |
|
"loss": 0.3437, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 2.6379690949227374e-05, |
|
"loss": 0.3113, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 2.5827814569536424e-05, |
|
"loss": 0.3119, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 2.5275938189845478e-05, |
|
"loss": 0.2737, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 2.4724061810154528e-05, |
|
"loss": 0.3649, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 2.4172185430463578e-05, |
|
"loss": 0.328, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 2.3620309050772628e-05, |
|
"loss": 0.3084, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 2.306843267108168e-05, |
|
"loss": 0.3011, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 2.2516556291390732e-05, |
|
"loss": 0.3178, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 2.1964679911699782e-05, |
|
"loss": 0.2653, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 2.1412803532008832e-05, |
|
"loss": 0.3, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 2.0860927152317882e-05, |
|
"loss": 0.3085, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 2.0309050772626932e-05, |
|
"loss": 0.2785, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 1.9757174392935986e-05, |
|
"loss": 0.2812, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 1.9205298013245036e-05, |
|
"loss": 0.2709, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 1.8653421633554086e-05, |
|
"loss": 0.3093, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.9764814814814815, |
|
"eval_loss": 0.216535285115242, |
|
"eval_runtime": 69.25, |
|
"eval_samples_per_second": 77.978, |
|
"eval_steps_per_second": 2.44, |
|
"step": 337 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 1.8101545253863136e-05, |
|
"loss": 0.2985, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 1.7549668874172186e-05, |
|
"loss": 0.2863, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 1.699779249448124e-05, |
|
"loss": 0.2912, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 1.644591611479029e-05, |
|
"loss": 0.2363, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 1.589403973509934e-05, |
|
"loss": 0.2927, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 1.534216335540839e-05, |
|
"loss": 0.2855, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 1.479028697571744e-05, |
|
"loss": 0.2891, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"learning_rate": 1.4238410596026492e-05, |
|
"loss": 0.2918, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 1.3686534216335542e-05, |
|
"loss": 0.2656, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 1.3134657836644593e-05, |
|
"loss": 0.2515, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 1.2582781456953644e-05, |
|
"loss": 0.2983, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"learning_rate": 1.2030905077262694e-05, |
|
"loss": 0.2691, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"learning_rate": 1.1479028697571745e-05, |
|
"loss": 0.2863, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 1.0927152317880796e-05, |
|
"loss": 0.2205, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"learning_rate": 1.0375275938189846e-05, |
|
"loss": 0.261, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"learning_rate": 9.823399558498897e-06, |
|
"loss": 0.2179, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"learning_rate": 9.271523178807948e-06, |
|
"loss": 0.2446, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"learning_rate": 8.719646799116998e-06, |
|
"loss": 0.2695, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 8.16777041942605e-06, |
|
"loss": 0.2919, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"learning_rate": 7.6158940397350995e-06, |
|
"loss": 0.2925, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"learning_rate": 7.0640176600441505e-06, |
|
"loss": 0.2524, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"learning_rate": 6.512141280353201e-06, |
|
"loss": 0.2584, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"learning_rate": 5.9602649006622515e-06, |
|
"loss": 0.2678, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 5.4083885209713025e-06, |
|
"loss": 0.2625, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"learning_rate": 4.856512141280353e-06, |
|
"loss": 0.2577, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"learning_rate": 4.304635761589404e-06, |
|
"loss": 0.2243, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"learning_rate": 3.752759381898455e-06, |
|
"loss": 0.267, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"learning_rate": 3.2008830022075054e-06, |
|
"loss": 0.2599, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"learning_rate": 2.6490066225165563e-06, |
|
"loss": 0.2431, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"learning_rate": 2.0971302428256073e-06, |
|
"loss": 0.2688, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"learning_rate": 1.545253863134658e-06, |
|
"loss": 0.2516, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"learning_rate": 9.933774834437087e-07, |
|
"loss": 0.2653, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"learning_rate": 4.415011037527594e-07, |
|
"loss": 0.2801, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"eval_accuracy": 0.9796296296296296, |
|
"eval_loss": 0.1896590143442154, |
|
"eval_runtime": 68.8802, |
|
"eval_samples_per_second": 78.397, |
|
"eval_steps_per_second": 2.454, |
|
"step": 504 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"step": 504, |
|
"total_flos": 4.999521681367106e+18, |
|
"train_loss": 0.5908920062439782, |
|
"train_runtime": 1706.6198, |
|
"train_samples_per_second": 37.97, |
|
"train_steps_per_second": 0.295 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 504, |
|
"num_train_epochs": 3, |
|
"save_steps": 500, |
|
"total_flos": 4.999521681367106e+18, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|