|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9966329966329966, |
|
"eval_steps": 500, |
|
"global_step": 148, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.006734006734006734, |
|
"grad_norm": 74.241455078125, |
|
"learning_rate": 1.3333333333333333e-05, |
|
"loss": 2.5135, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.03367003367003367, |
|
"grad_norm": 3.1438040733337402, |
|
"learning_rate": 6.666666666666667e-05, |
|
"loss": 2.4726, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.06734006734006734, |
|
"grad_norm": 1.2921487092971802, |
|
"learning_rate": 0.00013333333333333334, |
|
"loss": 2.3003, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.10101010101010101, |
|
"grad_norm": 1.0113569498062134, |
|
"learning_rate": 0.0002, |
|
"loss": 2.1321, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.13468013468013468, |
|
"grad_norm": 2.183227777481079, |
|
"learning_rate": 0.00019930337092856243, |
|
"loss": 1.9945, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.16835016835016836, |
|
"grad_norm": 0.47641709446907043, |
|
"learning_rate": 0.00019722318955551306, |
|
"loss": 1.8885, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.20202020202020202, |
|
"grad_norm": 0.7186397910118103, |
|
"learning_rate": 0.00019378843817721854, |
|
"loss": 1.8266, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.2356902356902357, |
|
"grad_norm": 0.37949636578559875, |
|
"learning_rate": 0.00018904697174694447, |
|
"loss": 1.7831, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.26936026936026936, |
|
"grad_norm": 0.5123534202575684, |
|
"learning_rate": 0.0001830648511318223, |
|
"loss": 1.7567, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.30303030303030304, |
|
"grad_norm": 0.5447937250137329, |
|
"learning_rate": 0.00017592542271443887, |
|
"loss": 1.7298, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.3367003367003367, |
|
"grad_norm": 0.202178493142128, |
|
"learning_rate": 0.00016772815716257412, |
|
"loss": 1.7114, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.37037037037037035, |
|
"grad_norm": 0.3912014663219452, |
|
"learning_rate": 0.00015858726354602248, |
|
"loss": 1.7067, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.40404040404040403, |
|
"grad_norm": 0.2787935137748718, |
|
"learning_rate": 0.00014863009810942815, |
|
"loss": 1.6797, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.4377104377104377, |
|
"grad_norm": 0.1719539761543274, |
|
"learning_rate": 0.000137995389871036, |
|
"loss": 1.6721, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.4713804713804714, |
|
"grad_norm": 0.2619878053665161, |
|
"learning_rate": 0.0001268313077693485, |
|
"loss": 1.6664, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.5050505050505051, |
|
"grad_norm": 0.268819659948349, |
|
"learning_rate": 0.0001152933962873246, |
|
"loss": 1.6685, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.5387205387205387, |
|
"grad_norm": 0.1786731630563736, |
|
"learning_rate": 0.00010354240831620541, |
|
"loss": 1.6523, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.5723905723905723, |
|
"grad_norm": 0.1858537644147873, |
|
"learning_rate": 9.174206545276677e-05, |
|
"loss": 1.6465, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.6060606060606061, |
|
"grad_norm": 0.25154948234558105, |
|
"learning_rate": 8.005677693484077e-05, |
|
"loss": 1.6344, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.6397306397306397, |
|
"grad_norm": 0.1697424203157425, |
|
"learning_rate": 6.864934899622191e-05, |
|
"loss": 1.6472, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.6734006734006734, |
|
"grad_norm": 0.1832387000322342, |
|
"learning_rate": 5.767871655555751e-05, |
|
"loss": 1.6354, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.7070707070707071, |
|
"grad_norm": 0.1768219918012619, |
|
"learning_rate": 4.729772884265212e-05, |
|
"loss": 1.6274, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.7407407407407407, |
|
"grad_norm": 0.17322488129138947, |
|
"learning_rate": 3.7651019814126654e-05, |
|
"loss": 1.6235, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.7744107744107744, |
|
"grad_norm": 0.1708805114030838, |
|
"learning_rate": 2.8872993029040508e-05, |
|
"loss": 1.6349, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.8080808080808081, |
|
"grad_norm": 0.1731610745191574, |
|
"learning_rate": 2.1085949060360654e-05, |
|
"loss": 1.6322, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.8417508417508418, |
|
"grad_norm": 0.1952390819787979, |
|
"learning_rate": 1.439838153227e-05, |
|
"loss": 1.6391, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.8754208754208754, |
|
"grad_norm": 0.1885603815317154, |
|
"learning_rate": 8.903465523913957e-06, |
|
"loss": 1.6368, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.9090909090909091, |
|
"grad_norm": 0.19654668867588043, |
|
"learning_rate": 4.6777594000230855e-06, |
|
"loss": 1.6307, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.9427609427609428, |
|
"grad_norm": 0.17118915915489197, |
|
"learning_rate": 1.7801381552624563e-06, |
|
"loss": 1.6287, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.9764309764309764, |
|
"grad_norm": 0.17712068557739258, |
|
"learning_rate": 2.509731335744281e-07, |
|
"loss": 1.632, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.9966329966329966, |
|
"eval_loss": 2.3924484252929688, |
|
"eval_runtime": 0.6217, |
|
"eval_samples_per_second": 33.78, |
|
"eval_steps_per_second": 1.609, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.9966329966329966, |
|
"step": 148, |
|
"total_flos": 6.55432743055786e+17, |
|
"train_loss": 1.752148318935085, |
|
"train_runtime": 587.2689, |
|
"train_samples_per_second": 48.443, |
|
"train_steps_per_second": 0.252 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 148, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 6.55432743055786e+17, |
|
"train_batch_size": 12, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|