vit-base-beans-demo-v5 / trainer_state.json
rshrott's picture
Training in progress, step 100
cff8a0e verified
raw
history blame
9.52 kB
{
"best_metric": 0.7924718260765076,
"best_model_checkpoint": "./vit-base-beans-demo-v5/checkpoint-200",
"epoch": 4.0,
"eval_steps": 100,
"global_step": 496,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.08,
"grad_norm": 2.0731146335601807,
"learning_rate": 0.00019596774193548388,
"loss": 1.5546,
"step": 10
},
{
"epoch": 0.16,
"grad_norm": 1.9955084323883057,
"learning_rate": 0.00019193548387096775,
"loss": 1.3431,
"step": 20
},
{
"epoch": 0.24,
"grad_norm": 1.4386166334152222,
"learning_rate": 0.00018790322580645164,
"loss": 1.1702,
"step": 30
},
{
"epoch": 0.32,
"grad_norm": 2.1243135929107666,
"learning_rate": 0.00018387096774193548,
"loss": 1.1703,
"step": 40
},
{
"epoch": 0.4,
"grad_norm": 1.4760087728500366,
"learning_rate": 0.00017983870967741935,
"loss": 1.0635,
"step": 50
},
{
"epoch": 0.48,
"grad_norm": 1.3573176860809326,
"learning_rate": 0.00017580645161290325,
"loss": 1.0524,
"step": 60
},
{
"epoch": 0.56,
"grad_norm": 3.1192474365234375,
"learning_rate": 0.00017177419354838711,
"loss": 1.0872,
"step": 70
},
{
"epoch": 0.65,
"grad_norm": 2.4728469848632812,
"learning_rate": 0.00016774193548387098,
"loss": 0.87,
"step": 80
},
{
"epoch": 0.73,
"grad_norm": 3.819819688796997,
"learning_rate": 0.00016370967741935485,
"loss": 1.2173,
"step": 90
},
{
"epoch": 0.81,
"grad_norm": 1.585181713104248,
"learning_rate": 0.00015967741935483872,
"loss": 1.1711,
"step": 100
},
{
"epoch": 0.81,
"eval_accuracy": 0.5981735159817352,
"eval_loss": 1.0254943370819092,
"eval_runtime": 7.7395,
"eval_samples_per_second": 28.296,
"eval_steps_per_second": 3.618,
"step": 100
},
{
"epoch": 0.89,
"grad_norm": 1.5606015920639038,
"learning_rate": 0.0001556451612903226,
"loss": 0.9609,
"step": 110
},
{
"epoch": 0.97,
"grad_norm": 2.0005290508270264,
"learning_rate": 0.00015161290322580646,
"loss": 0.8673,
"step": 120
},
{
"epoch": 1.05,
"grad_norm": 1.7934025526046753,
"learning_rate": 0.00014758064516129032,
"loss": 0.7807,
"step": 130
},
{
"epoch": 1.13,
"grad_norm": 2.0995261669158936,
"learning_rate": 0.00014354838709677422,
"loss": 0.7211,
"step": 140
},
{
"epoch": 1.21,
"grad_norm": 1.2384763956069946,
"learning_rate": 0.0001395161290322581,
"loss": 0.8051,
"step": 150
},
{
"epoch": 1.29,
"grad_norm": 3.001422643661499,
"learning_rate": 0.00013548387096774193,
"loss": 0.9052,
"step": 160
},
{
"epoch": 1.37,
"grad_norm": 1.8868207931518555,
"learning_rate": 0.0001314516129032258,
"loss": 0.7217,
"step": 170
},
{
"epoch": 1.45,
"grad_norm": 1.7666105031967163,
"learning_rate": 0.0001274193548387097,
"loss": 0.728,
"step": 180
},
{
"epoch": 1.53,
"grad_norm": 2.378178358078003,
"learning_rate": 0.00012338709677419356,
"loss": 0.7242,
"step": 190
},
{
"epoch": 1.61,
"grad_norm": 2.4882402420043945,
"learning_rate": 0.00011935483870967743,
"loss": 0.7083,
"step": 200
},
{
"epoch": 1.61,
"eval_accuracy": 0.6575342465753424,
"eval_loss": 0.7924718260765076,
"eval_runtime": 7.9712,
"eval_samples_per_second": 27.474,
"eval_steps_per_second": 3.513,
"step": 200
},
{
"epoch": 1.69,
"grad_norm": 3.762314796447754,
"learning_rate": 0.00011532258064516131,
"loss": 0.6794,
"step": 210
},
{
"epoch": 1.77,
"grad_norm": 1.8527878522872925,
"learning_rate": 0.00011129032258064515,
"loss": 0.5424,
"step": 220
},
{
"epoch": 1.85,
"grad_norm": 2.1447601318359375,
"learning_rate": 0.00010725806451612903,
"loss": 0.7186,
"step": 230
},
{
"epoch": 1.94,
"grad_norm": 2.2273943424224854,
"learning_rate": 0.0001032258064516129,
"loss": 0.7786,
"step": 240
},
{
"epoch": 2.02,
"grad_norm": 1.1381219625473022,
"learning_rate": 9.919354838709678e-05,
"loss": 0.5616,
"step": 250
},
{
"epoch": 2.1,
"grad_norm": 1.41716730594635,
"learning_rate": 9.516129032258065e-05,
"loss": 0.3615,
"step": 260
},
{
"epoch": 2.18,
"grad_norm": 1.193400263786316,
"learning_rate": 9.112903225806452e-05,
"loss": 0.3589,
"step": 270
},
{
"epoch": 2.26,
"grad_norm": 1.1476421356201172,
"learning_rate": 8.709677419354839e-05,
"loss": 0.302,
"step": 280
},
{
"epoch": 2.34,
"grad_norm": 2.02689790725708,
"learning_rate": 8.306451612903227e-05,
"loss": 0.4161,
"step": 290
},
{
"epoch": 2.42,
"grad_norm": 1.2040166854858398,
"learning_rate": 7.903225806451613e-05,
"loss": 0.2479,
"step": 300
},
{
"epoch": 2.42,
"eval_accuracy": 0.6940639269406392,
"eval_loss": 0.871184766292572,
"eval_runtime": 7.1874,
"eval_samples_per_second": 30.47,
"eval_steps_per_second": 3.896,
"step": 300
},
{
"epoch": 2.5,
"grad_norm": 4.465162754058838,
"learning_rate": 7.500000000000001e-05,
"loss": 0.2949,
"step": 310
},
{
"epoch": 2.58,
"grad_norm": 2.3733272552490234,
"learning_rate": 7.096774193548388e-05,
"loss": 0.2599,
"step": 320
},
{
"epoch": 2.66,
"grad_norm": 3.527358293533325,
"learning_rate": 6.693548387096774e-05,
"loss": 0.4337,
"step": 330
},
{
"epoch": 2.74,
"grad_norm": 2.8637776374816895,
"learning_rate": 6.290322580645161e-05,
"loss": 0.3137,
"step": 340
},
{
"epoch": 2.82,
"grad_norm": 4.198659896850586,
"learning_rate": 5.887096774193549e-05,
"loss": 0.3192,
"step": 350
},
{
"epoch": 2.9,
"grad_norm": 4.358771324157715,
"learning_rate": 5.4838709677419355e-05,
"loss": 0.3796,
"step": 360
},
{
"epoch": 2.98,
"grad_norm": 3.1412558555603027,
"learning_rate": 5.080645161290323e-05,
"loss": 0.2622,
"step": 370
},
{
"epoch": 3.06,
"grad_norm": 1.4319863319396973,
"learning_rate": 4.67741935483871e-05,
"loss": 0.2016,
"step": 380
},
{
"epoch": 3.15,
"grad_norm": 0.5633509755134583,
"learning_rate": 4.2741935483870973e-05,
"loss": 0.1276,
"step": 390
},
{
"epoch": 3.23,
"grad_norm": 0.31856054067611694,
"learning_rate": 3.870967741935484e-05,
"loss": 0.127,
"step": 400
},
{
"epoch": 3.23,
"eval_accuracy": 0.6940639269406392,
"eval_loss": 0.8440136313438416,
"eval_runtime": 7.6031,
"eval_samples_per_second": 28.804,
"eval_steps_per_second": 3.683,
"step": 400
},
{
"epoch": 3.31,
"grad_norm": 0.2085038274526596,
"learning_rate": 3.467741935483872e-05,
"loss": 0.1289,
"step": 410
},
{
"epoch": 3.39,
"grad_norm": 2.471668004989624,
"learning_rate": 3.0645161290322585e-05,
"loss": 0.1495,
"step": 420
},
{
"epoch": 3.47,
"grad_norm": 1.026688575744629,
"learning_rate": 2.661290322580645e-05,
"loss": 0.1389,
"step": 430
},
{
"epoch": 3.55,
"grad_norm": 4.408120155334473,
"learning_rate": 2.258064516129032e-05,
"loss": 0.1799,
"step": 440
},
{
"epoch": 3.63,
"grad_norm": 1.7386049032211304,
"learning_rate": 1.8548387096774193e-05,
"loss": 0.0727,
"step": 450
},
{
"epoch": 3.71,
"grad_norm": 0.9766908884048462,
"learning_rate": 1.4516129032258066e-05,
"loss": 0.1062,
"step": 460
},
{
"epoch": 3.79,
"grad_norm": 0.2413896918296814,
"learning_rate": 1.0483870967741936e-05,
"loss": 0.1303,
"step": 470
},
{
"epoch": 3.87,
"grad_norm": 1.249718427658081,
"learning_rate": 6.451612903225806e-06,
"loss": 0.1025,
"step": 480
},
{
"epoch": 3.95,
"grad_norm": 3.0604677200317383,
"learning_rate": 2.4193548387096776e-06,
"loss": 0.1515,
"step": 490
},
{
"epoch": 4.0,
"step": 496,
"total_flos": 6.10974224738132e+17,
"train_loss": 0.575864625193419,
"train_runtime": 402.9871,
"train_samples_per_second": 19.564,
"train_steps_per_second": 1.231
}
],
"logging_steps": 10,
"max_steps": 496,
"num_input_tokens_seen": 0,
"num_train_epochs": 4,
"save_steps": 100,
"total_flos": 6.10974224738132e+17,
"train_batch_size": 16,
"trial_name": null,
"trial_params": null
}