roberta-base-indonesia / trainer_state.json
akahana's picture
train with TPUv2
4a0ba50
raw
history blame
8.16 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 15.968063872255488,
"global_step": 32000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.25,
"learning_rate": 9.916833000665336e-05,
"loss": 7.7087,
"step": 500
},
{
"epoch": 0.5,
"learning_rate": 9.833666001330672e-05,
"loss": 7.4465,
"step": 1000
},
{
"epoch": 0.75,
"learning_rate": 9.750499001996009e-05,
"loss": 7.1976,
"step": 1500
},
{
"epoch": 1.0,
"learning_rate": 9.667332002661345e-05,
"loss": 6.9445,
"step": 2000
},
{
"epoch": 1.25,
"learning_rate": 9.58416500332668e-05,
"loss": 6.6812,
"step": 2500
},
{
"epoch": 1.5,
"learning_rate": 9.500998003992016e-05,
"loss": 6.2449,
"step": 3000
},
{
"epoch": 1.75,
"learning_rate": 9.417831004657353e-05,
"loss": 5.6326,
"step": 3500
},
{
"epoch": 2.0,
"learning_rate": 9.334664005322689e-05,
"loss": 4.9326,
"step": 4000
},
{
"epoch": 2.25,
"learning_rate": 9.251497005988024e-05,
"loss": 4.4854,
"step": 4500
},
{
"epoch": 2.5,
"learning_rate": 9.16833000665336e-05,
"loss": 4.1808,
"step": 5000
},
{
"epoch": 2.74,
"learning_rate": 9.085163007318697e-05,
"loss": 3.9169,
"step": 5500
},
{
"epoch": 2.99,
"learning_rate": 9.001996007984033e-05,
"loss": 3.7161,
"step": 6000
},
{
"epoch": 3.24,
"learning_rate": 8.918829008649369e-05,
"loss": 3.5102,
"step": 6500
},
{
"epoch": 3.49,
"learning_rate": 8.835662009314704e-05,
"loss": 3.3782,
"step": 7000
},
{
"epoch": 3.74,
"learning_rate": 8.752495009980041e-05,
"loss": 3.246,
"step": 7500
},
{
"epoch": 3.99,
"learning_rate": 8.669328010645377e-05,
"loss": 3.1532,
"step": 8000
},
{
"epoch": 4.24,
"learning_rate": 8.586161011310713e-05,
"loss": 3.0313,
"step": 8500
},
{
"epoch": 4.49,
"learning_rate": 8.502994011976048e-05,
"loss": 2.9563,
"step": 9000
},
{
"epoch": 4.74,
"learning_rate": 8.419827012641384e-05,
"loss": 2.8741,
"step": 9500
},
{
"epoch": 4.99,
"learning_rate": 8.336660013306721e-05,
"loss": 2.816,
"step": 10000
},
{
"epoch": 5.24,
"learning_rate": 8.253493013972057e-05,
"loss": 2.7435,
"step": 10500
},
{
"epoch": 5.49,
"learning_rate": 8.170326014637393e-05,
"loss": 2.6801,
"step": 11000
},
{
"epoch": 5.74,
"learning_rate": 8.087159015302728e-05,
"loss": 2.6325,
"step": 11500
},
{
"epoch": 5.99,
"learning_rate": 8.003992015968065e-05,
"loss": 2.5931,
"step": 12000
},
{
"epoch": 6.24,
"learning_rate": 7.9208250166334e-05,
"loss": 2.525,
"step": 12500
},
{
"epoch": 6.49,
"learning_rate": 7.837658017298735e-05,
"loss": 2.4914,
"step": 13000
},
{
"epoch": 6.74,
"learning_rate": 7.754491017964072e-05,
"loss": 2.4559,
"step": 13500
},
{
"epoch": 6.99,
"learning_rate": 7.671324018629408e-05,
"loss": 2.4176,
"step": 14000
},
{
"epoch": 7.24,
"learning_rate": 7.588157019294744e-05,
"loss": 2.3771,
"step": 14500
},
{
"epoch": 7.49,
"learning_rate": 7.50499001996008e-05,
"loss": 2.3386,
"step": 15000
},
{
"epoch": 7.73,
"learning_rate": 7.421823020625415e-05,
"loss": 2.3137,
"step": 15500
},
{
"epoch": 7.98,
"learning_rate": 7.338656021290752e-05,
"loss": 2.2792,
"step": 16000
},
{
"epoch": 8.23,
"learning_rate": 7.255489021956088e-05,
"loss": 2.235,
"step": 16500
},
{
"epoch": 8.48,
"learning_rate": 7.172322022621424e-05,
"loss": 2.2126,
"step": 17000
},
{
"epoch": 8.73,
"learning_rate": 7.089155023286759e-05,
"loss": 2.1985,
"step": 17500
},
{
"epoch": 8.98,
"learning_rate": 7.005988023952096e-05,
"loss": 2.1803,
"step": 18000
},
{
"epoch": 9.23,
"learning_rate": 6.922821024617432e-05,
"loss": 2.1363,
"step": 18500
},
{
"epoch": 9.48,
"learning_rate": 6.839654025282768e-05,
"loss": 2.1104,
"step": 19000
},
{
"epoch": 9.73,
"learning_rate": 6.756487025948103e-05,
"loss": 2.0954,
"step": 19500
},
{
"epoch": 9.98,
"learning_rate": 6.67332002661344e-05,
"loss": 2.0818,
"step": 20000
},
{
"epoch": 10.23,
"learning_rate": 6.590153027278776e-05,
"loss": 2.0576,
"step": 20500
},
{
"epoch": 10.48,
"learning_rate": 6.506986027944112e-05,
"loss": 2.0304,
"step": 21000
},
{
"epoch": 10.73,
"learning_rate": 6.423819028609448e-05,
"loss": 2.0079,
"step": 21500
},
{
"epoch": 10.98,
"learning_rate": 6.340652029274785e-05,
"loss": 2.0089,
"step": 22000
},
{
"epoch": 11.23,
"learning_rate": 6.25748502994012e-05,
"loss": 1.971,
"step": 22500
},
{
"epoch": 11.48,
"learning_rate": 6.174318030605456e-05,
"loss": 1.9458,
"step": 23000
},
{
"epoch": 11.73,
"learning_rate": 6.091151031270792e-05,
"loss": 1.9479,
"step": 23500
},
{
"epoch": 11.98,
"learning_rate": 6.007984031936128e-05,
"loss": 1.9332,
"step": 24000
},
{
"epoch": 12.23,
"learning_rate": 5.924817032601464e-05,
"loss": 1.9048,
"step": 24500
},
{
"epoch": 12.48,
"learning_rate": 5.8416500332668e-05,
"loss": 1.8943,
"step": 25000
},
{
"epoch": 12.72,
"learning_rate": 5.758483033932136e-05,
"loss": 1.8837,
"step": 25500
},
{
"epoch": 12.97,
"learning_rate": 5.675316034597472e-05,
"loss": 1.8606,
"step": 26000
},
{
"epoch": 13.22,
"learning_rate": 5.592149035262808e-05,
"loss": 1.8289,
"step": 26500
},
{
"epoch": 13.47,
"learning_rate": 5.508982035928144e-05,
"loss": 1.8209,
"step": 27000
},
{
"epoch": 13.72,
"learning_rate": 5.42581503659348e-05,
"loss": 1.8284,
"step": 27500
},
{
"epoch": 13.97,
"learning_rate": 5.342648037258816e-05,
"loss": 1.814,
"step": 28000
},
{
"epoch": 14.22,
"learning_rate": 5.259481037924152e-05,
"loss": 1.7909,
"step": 28500
},
{
"epoch": 14.47,
"learning_rate": 5.1763140385894884e-05,
"loss": 1.7712,
"step": 29000
},
{
"epoch": 14.72,
"learning_rate": 5.093147039254824e-05,
"loss": 1.7695,
"step": 29500
},
{
"epoch": 14.97,
"learning_rate": 5.0099800399201604e-05,
"loss": 1.7594,
"step": 30000
},
{
"epoch": 15.22,
"learning_rate": 4.9268130405854955e-05,
"loss": 1.7413,
"step": 30500
},
{
"epoch": 15.47,
"learning_rate": 4.843646041250832e-05,
"loss": 1.7254,
"step": 31000
},
{
"epoch": 15.72,
"learning_rate": 4.7604790419161675e-05,
"loss": 1.7287,
"step": 31500
},
{
"epoch": 15.97,
"learning_rate": 4.677312042581504e-05,
"loss": 1.7078,
"step": 32000
}
],
"max_steps": 60120,
"num_train_epochs": 30,
"total_flos": 3.3697979301888e+16,
"trial_name": null,
"trial_params": null
}