chansung's picture
Model save
f74a994 verified
raw
history blame
9.18 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.9977827050997783,
"eval_steps": 500,
"global_step": 225,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.004434589800443459,
"grad_norm": 1.91265869140625,
"learning_rate": 4.347826086956522e-05,
"loss": 2.8127,
"step": 1
},
{
"epoch": 0.022172949002217297,
"grad_norm": 1.5314122438430786,
"learning_rate": 0.0002173913043478261,
"loss": 2.7241,
"step": 5
},
{
"epoch": 0.04434589800443459,
"grad_norm": 0.6431057453155518,
"learning_rate": 0.0004347826086956522,
"loss": 2.2423,
"step": 10
},
{
"epoch": 0.06651884700665188,
"grad_norm": 0.5257381200790405,
"learning_rate": 0.0006521739130434783,
"loss": 1.9505,
"step": 15
},
{
"epoch": 0.08869179600886919,
"grad_norm": 0.37703999876976013,
"learning_rate": 0.0008695652173913044,
"loss": 1.7881,
"step": 20
},
{
"epoch": 0.11086474501108648,
"grad_norm": 0.30256885290145874,
"learning_rate": 0.0009900990099009901,
"loss": 1.7031,
"step": 25
},
{
"epoch": 0.13303769401330376,
"grad_norm": 0.3443244993686676,
"learning_rate": 0.0009653465346534653,
"loss": 1.6352,
"step": 30
},
{
"epoch": 0.15521064301552107,
"grad_norm": 0.369827538728714,
"learning_rate": 0.0009405940594059406,
"loss": 1.5746,
"step": 35
},
{
"epoch": 0.17738359201773837,
"grad_norm": 0.231527641415596,
"learning_rate": 0.0009158415841584159,
"loss": 1.5409,
"step": 40
},
{
"epoch": 0.19955654101995565,
"grad_norm": 0.22827404737472534,
"learning_rate": 0.0008910891089108911,
"loss": 1.5187,
"step": 45
},
{
"epoch": 0.22172949002217296,
"grad_norm": 0.2396710067987442,
"learning_rate": 0.0008663366336633663,
"loss": 1.5128,
"step": 50
},
{
"epoch": 0.24390243902439024,
"grad_norm": 0.20095600187778473,
"learning_rate": 0.0008415841584158416,
"loss": 1.4848,
"step": 55
},
{
"epoch": 0.2660753880266075,
"grad_norm": 0.28900983929634094,
"learning_rate": 0.0008168316831683168,
"loss": 1.4962,
"step": 60
},
{
"epoch": 0.28824833702882485,
"grad_norm": 0.25716254115104675,
"learning_rate": 0.0007920792079207921,
"loss": 1.4789,
"step": 65
},
{
"epoch": 0.31042128603104213,
"grad_norm": 0.252340167760849,
"learning_rate": 0.0007673267326732674,
"loss": 1.458,
"step": 70
},
{
"epoch": 0.3325942350332594,
"grad_norm": 0.20464155077934265,
"learning_rate": 0.0007425742574257426,
"loss": 1.4558,
"step": 75
},
{
"epoch": 0.35476718403547675,
"grad_norm": 0.23394732177257538,
"learning_rate": 0.0007178217821782178,
"loss": 1.4562,
"step": 80
},
{
"epoch": 0.376940133037694,
"grad_norm": 0.2164139449596405,
"learning_rate": 0.000693069306930693,
"loss": 1.4338,
"step": 85
},
{
"epoch": 0.3991130820399113,
"grad_norm": 0.215862438082695,
"learning_rate": 0.0006683168316831684,
"loss": 1.4287,
"step": 90
},
{
"epoch": 0.4212860310421286,
"grad_norm": 0.20270515978336334,
"learning_rate": 0.0006435643564356436,
"loss": 1.4226,
"step": 95
},
{
"epoch": 0.4434589800443459,
"grad_norm": 0.20255711674690247,
"learning_rate": 0.0006188118811881188,
"loss": 1.4314,
"step": 100
},
{
"epoch": 0.4656319290465632,
"grad_norm": 0.20747065544128418,
"learning_rate": 0.000594059405940594,
"loss": 1.4194,
"step": 105
},
{
"epoch": 0.4878048780487805,
"grad_norm": 0.2104884535074234,
"learning_rate": 0.0005693069306930693,
"loss": 1.4106,
"step": 110
},
{
"epoch": 0.5099778270509978,
"grad_norm": 0.21514882147312164,
"learning_rate": 0.0005445544554455446,
"loss": 1.42,
"step": 115
},
{
"epoch": 0.532150776053215,
"grad_norm": 0.20466424524784088,
"learning_rate": 0.0005198019801980198,
"loss": 1.3937,
"step": 120
},
{
"epoch": 0.5543237250554324,
"grad_norm": 0.2181282341480255,
"learning_rate": 0.0004950495049504951,
"loss": 1.3972,
"step": 125
},
{
"epoch": 0.5764966740576497,
"grad_norm": 0.22615699470043182,
"learning_rate": 0.0004702970297029703,
"loss": 1.3882,
"step": 130
},
{
"epoch": 0.5986696230598669,
"grad_norm": 0.1967965066432953,
"learning_rate": 0.00044554455445544556,
"loss": 1.388,
"step": 135
},
{
"epoch": 0.6208425720620843,
"grad_norm": 0.2030034065246582,
"learning_rate": 0.0004207920792079208,
"loss": 1.4048,
"step": 140
},
{
"epoch": 0.6430155210643016,
"grad_norm": 0.2136310189962387,
"learning_rate": 0.00039603960396039607,
"loss": 1.3918,
"step": 145
},
{
"epoch": 0.6651884700665188,
"grad_norm": 0.22149060666561127,
"learning_rate": 0.0003712871287128713,
"loss": 1.4023,
"step": 150
},
{
"epoch": 0.6873614190687362,
"grad_norm": 0.2130667269229889,
"learning_rate": 0.0003465346534653465,
"loss": 1.3933,
"step": 155
},
{
"epoch": 0.7095343680709535,
"grad_norm": 0.19920696318149567,
"learning_rate": 0.0003217821782178218,
"loss": 1.3815,
"step": 160
},
{
"epoch": 0.7317073170731707,
"grad_norm": 0.20453611016273499,
"learning_rate": 0.000297029702970297,
"loss": 1.3648,
"step": 165
},
{
"epoch": 0.753880266075388,
"grad_norm": 0.21325863897800446,
"learning_rate": 0.0002722772277227723,
"loss": 1.3773,
"step": 170
},
{
"epoch": 0.7760532150776053,
"grad_norm": 0.2014823704957962,
"learning_rate": 0.00024752475247524753,
"loss": 1.3881,
"step": 175
},
{
"epoch": 0.7982261640798226,
"grad_norm": 0.20359407365322113,
"learning_rate": 0.00022277227722772278,
"loss": 1.3826,
"step": 180
},
{
"epoch": 0.8203991130820399,
"grad_norm": 0.21738748252391815,
"learning_rate": 0.00019801980198019803,
"loss": 1.3705,
"step": 185
},
{
"epoch": 0.8425720620842572,
"grad_norm": 0.1990172564983368,
"learning_rate": 0.00017326732673267326,
"loss": 1.3693,
"step": 190
},
{
"epoch": 0.8647450110864745,
"grad_norm": 0.2007543295621872,
"learning_rate": 0.0001485148514851485,
"loss": 1.3575,
"step": 195
},
{
"epoch": 0.8869179600886918,
"grad_norm": 0.5149243474006653,
"learning_rate": 0.00012376237623762376,
"loss": 1.374,
"step": 200
},
{
"epoch": 0.9090909090909091,
"grad_norm": 0.2131042778491974,
"learning_rate": 9.900990099009902e-05,
"loss": 1.3636,
"step": 205
},
{
"epoch": 0.9312638580931264,
"grad_norm": 0.19097404181957245,
"learning_rate": 7.425742574257426e-05,
"loss": 1.3489,
"step": 210
},
{
"epoch": 0.9534368070953437,
"grad_norm": 0.19905418157577515,
"learning_rate": 4.950495049504951e-05,
"loss": 1.3442,
"step": 215
},
{
"epoch": 0.975609756097561,
"grad_norm": 0.19617854058742523,
"learning_rate": 2.4752475247524754e-05,
"loss": 1.3721,
"step": 220
},
{
"epoch": 0.9977827050997783,
"grad_norm": 0.20064575970172882,
"learning_rate": 0.0,
"loss": 1.3767,
"step": 225
},
{
"epoch": 0.9977827050997783,
"eval_loss": 1.7732421159744263,
"eval_runtime": 0.5415,
"eval_samples_per_second": 1.847,
"eval_steps_per_second": 1.847,
"step": 225
},
{
"epoch": 0.9977827050997783,
"step": 225,
"total_flos": 3.3259687694984806e+17,
"train_loss": 1.4963340536753336,
"train_runtime": 725.2803,
"train_samples_per_second": 9.934,
"train_steps_per_second": 0.31
}
],
"logging_steps": 5,
"max_steps": 225,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 100,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 3.3259687694984806e+17,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}