|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 4.03, |
|
"global_step": 2000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 2.7416666666666667e-08, |
|
"loss": 1.3507, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 5.658333333333333e-08, |
|
"loss": 1.3311, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 8.575e-08, |
|
"loss": 1.3252, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 1.1491666666666665e-07, |
|
"loss": 1.2374, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 1.4408333333333332e-07, |
|
"loss": 1.2842, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 1.7325e-07, |
|
"loss": 1.1666, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 2.0241666666666666e-07, |
|
"loss": 1.0935, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 2.315833333333333e-07, |
|
"loss": 1.0436, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 2.6075e-07, |
|
"loss": 1.0156, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 2.8991666666666667e-07, |
|
"loss": 1.1327, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 3.190833333333333e-07, |
|
"loss": 1.3554, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 3.4824999999999996e-07, |
|
"loss": 1.2085, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 3.3825e-07, |
|
"loss": 1.2987, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 3.2574999999999996e-07, |
|
"loss": 0.9905, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 3.1325e-07, |
|
"loss": 1.0011, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 3.0075e-07, |
|
"loss": 0.9573, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 2.8825e-07, |
|
"loss": 0.9946, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 2.7574999999999996e-07, |
|
"loss": 0.9934, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 2.6325e-07, |
|
"loss": 0.9519, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 2.5075e-07, |
|
"loss": 1.042, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 2.3824999999999998e-07, |
|
"loss": 1.0339, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 2.2575e-07, |
|
"loss": 1.0101, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 2.1325e-07, |
|
"loss": 1.0805, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 2.0075000000000002e-07, |
|
"loss": 0.8829, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 1.8824999999999998e-07, |
|
"loss": 0.901, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 1.7574999999999997e-07, |
|
"loss": 0.8773, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 1.6324999999999998e-07, |
|
"loss": 0.921, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"learning_rate": 1.5075e-07, |
|
"loss": 0.8838, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 1.3825e-07, |
|
"loss": 0.9622, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 3.02, |
|
"learning_rate": 1.2575e-07, |
|
"loss": 0.9518, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 3.05, |
|
"learning_rate": 1.1325e-07, |
|
"loss": 0.9739, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 3.07, |
|
"learning_rate": 1.0075e-07, |
|
"loss": 0.9918, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 3.1, |
|
"learning_rate": 8.825e-08, |
|
"loss": 0.9803, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 3.12, |
|
"learning_rate": 7.575e-08, |
|
"loss": 0.8518, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 3.15, |
|
"learning_rate": 6.325e-08, |
|
"loss": 0.8628, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 3.17, |
|
"learning_rate": 5.0749999999999995e-08, |
|
"loss": 0.864, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"learning_rate": 3.825e-08, |
|
"loss": 0.9134, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 3.22, |
|
"learning_rate": 2.5749999999999996e-08, |
|
"loss": 0.828, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 1.3250000000000001e-08, |
|
"loss": 0.9324, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 4.03, |
|
"learning_rate": 7.5e-10, |
|
"loss": 0.6153, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 4.03, |
|
"step": 2000, |
|
"total_flos": 3.15121729536e+18, |
|
"train_loss": 1.0273077850341796, |
|
"train_runtime": 12431.9662, |
|
"train_samples_per_second": 10.296, |
|
"train_steps_per_second": 0.161 |
|
} |
|
], |
|
"max_steps": 2000, |
|
"num_train_epochs": 9223372036854775807, |
|
"total_flos": 3.15121729536e+18, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|