File size: 2,373 Bytes
48de83f 9a012c2 48de83f 9a012c2 4b3e94a 9a012c2 4b3e94a 48de83f 9a012c2 4b3e94a 9a012c2 4b3e94a 48de83f 9a012c2 4b3e94a 9a012c2 4b3e94a 48de83f 9a012c2 4b3e94a 9a012c2 4b3e94a 48de83f 9a012c2 4b3e94a 9a012c2 4b3e94a 48de83f 9a012c2 4b3e94a 9a012c2 4b3e94a 48de83f 6303895 9a012c2 4b3e94a 9a012c2 4b3e94a 6303895 9a012c2 4b3e94a 9a012c2 4b3e94a 6303895 48de83f 9a012c2 4b3e94a 48de83f 9a012c2 48de83f 9a012c2 48de83f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 |
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 4.0,
"eval_steps": 100,
"global_step": 84,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.47619047619047616,
"grad_norm": 2.757856845855713,
"learning_rate": 0.0001761904761904762,
"loss": 0.5843,
"step": 10
},
{
"epoch": 0.9523809523809523,
"grad_norm": 3.4689693450927734,
"learning_rate": 0.00015238095238095237,
"loss": 0.5473,
"step": 20
},
{
"epoch": 1.4285714285714286,
"grad_norm": 0.5666776299476624,
"learning_rate": 0.00012857142857142858,
"loss": 0.1833,
"step": 30
},
{
"epoch": 1.9047619047619047,
"grad_norm": 0.2773081362247467,
"learning_rate": 0.00010476190476190477,
"loss": 0.1073,
"step": 40
},
{
"epoch": 2.380952380952381,
"grad_norm": 0.14523948729038239,
"learning_rate": 8.095238095238096e-05,
"loss": 0.0217,
"step": 50
},
{
"epoch": 2.857142857142857,
"grad_norm": 0.10562175512313843,
"learning_rate": 5.714285714285714e-05,
"loss": 0.0158,
"step": 60
},
{
"epoch": 3.3333333333333335,
"grad_norm": 0.09110253304243088,
"learning_rate": 3.3333333333333335e-05,
"loss": 0.0125,
"step": 70
},
{
"epoch": 3.8095238095238093,
"grad_norm": 0.09965213388204575,
"learning_rate": 9.523809523809523e-06,
"loss": 0.0116,
"step": 80
},
{
"epoch": 4.0,
"step": 84,
"total_flos": 2.5417372593586176e+16,
"train_loss": 0.17717325119745164,
"train_runtime": 30.5569,
"train_samples_per_second": 10.734,
"train_steps_per_second": 2.749
}
],
"logging_steps": 10,
"max_steps": 84,
"num_input_tokens_seen": 0,
"num_train_epochs": 4,
"save_steps": 100,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 2.5417372593586176e+16,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}
|