{
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 0.9977827050997783,
  "eval_steps": 500,
  "global_step": 225,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.004434589800443459,
      "grad_norm": 1.91265869140625,
      "learning_rate": 4.347826086956522e-05,
      "loss": 2.8127,
      "step": 1
    },
    {
      "epoch": 0.022172949002217297,
      "grad_norm": 1.5314122438430786,
      "learning_rate": 0.0002173913043478261,
      "loss": 2.7241,
      "step": 5
    },
    {
      "epoch": 0.04434589800443459,
      "grad_norm": 0.6431057453155518,
      "learning_rate": 0.0004347826086956522,
      "loss": 2.2423,
      "step": 10
    },
    {
      "epoch": 0.06651884700665188,
      "grad_norm": 0.5257381200790405,
      "learning_rate": 0.0006521739130434783,
      "loss": 1.9505,
      "step": 15
    },
    {
      "epoch": 0.08869179600886919,
      "grad_norm": 0.37703999876976013,
      "learning_rate": 0.0008695652173913044,
      "loss": 1.7881,
      "step": 20
    },
    {
      "epoch": 0.11086474501108648,
      "grad_norm": 0.30256885290145874,
      "learning_rate": 0.0009900990099009901,
      "loss": 1.7031,
      "step": 25
    },
    {
      "epoch": 0.13303769401330376,
      "grad_norm": 0.3443244993686676,
      "learning_rate": 0.0009653465346534653,
      "loss": 1.6352,
      "step": 30
    },
    {
      "epoch": 0.15521064301552107,
      "grad_norm": 0.369827538728714,
      "learning_rate": 0.0009405940594059406,
      "loss": 1.5746,
      "step": 35
    },
    {
      "epoch": 0.17738359201773837,
      "grad_norm": 0.231527641415596,
      "learning_rate": 0.0009158415841584159,
      "loss": 1.5409,
      "step": 40
    },
    {
      "epoch": 0.19955654101995565,
      "grad_norm": 0.22827404737472534,
      "learning_rate": 0.0008910891089108911,
      "loss": 1.5187,
      "step": 45
    },
    {
      "epoch": 0.22172949002217296,
      "grad_norm": 0.2396710067987442,
      "learning_rate": 0.0008663366336633663,
      "loss": 1.5128,
      "step": 50
    },
    {
      "epoch": 0.24390243902439024,
      "grad_norm": 0.20095600187778473,
      "learning_rate": 0.0008415841584158416,
      "loss": 1.4848,
      "step": 55
    },
    {
      "epoch": 0.2660753880266075,
      "grad_norm": 0.28900983929634094,
      "learning_rate": 0.0008168316831683168,
      "loss": 1.4962,
      "step": 60
    },
    {
      "epoch": 0.28824833702882485,
      "grad_norm": 0.25716254115104675,
      "learning_rate": 0.0007920792079207921,
      "loss": 1.4789,
      "step": 65
    },
    {
      "epoch": 0.31042128603104213,
      "grad_norm": 0.252340167760849,
      "learning_rate": 0.0007673267326732674,
      "loss": 1.458,
      "step": 70
    },
    {
      "epoch": 0.3325942350332594,
      "grad_norm": 0.20464155077934265,
      "learning_rate": 0.0007425742574257426,
      "loss": 1.4558,
      "step": 75
    },
    {
      "epoch": 0.35476718403547675,
      "grad_norm": 0.23394732177257538,
      "learning_rate": 0.0007178217821782178,
      "loss": 1.4562,
      "step": 80
    },
    {
      "epoch": 0.376940133037694,
      "grad_norm": 0.2164139449596405,
      "learning_rate": 0.000693069306930693,
      "loss": 1.4338,
      "step": 85
    },
    {
      "epoch": 0.3991130820399113,
      "grad_norm": 0.215862438082695,
      "learning_rate": 0.0006683168316831684,
      "loss": 1.4287,
      "step": 90
    },
    {
      "epoch": 0.4212860310421286,
      "grad_norm": 0.20270515978336334,
      "learning_rate": 0.0006435643564356436,
      "loss": 1.4226,
      "step": 95
    },
    {
      "epoch": 0.4434589800443459,
      "grad_norm": 0.20255711674690247,
      "learning_rate": 0.0006188118811881188,
      "loss": 1.4314,
      "step": 100
    },
    {
      "epoch": 0.4656319290465632,
      "grad_norm": 0.20747065544128418,
      "learning_rate": 0.000594059405940594,
      "loss": 1.4194,
      "step": 105
    },
    {
      "epoch": 0.4878048780487805,
      "grad_norm": 0.2104884535074234,
      "learning_rate": 0.0005693069306930693,
      "loss": 1.4106,
      "step": 110
    },
    {
      "epoch": 0.5099778270509978,
      "grad_norm": 0.21514882147312164,
      "learning_rate": 0.0005445544554455446,
      "loss": 1.42,
      "step": 115
    },
    {
      "epoch": 0.532150776053215,
      "grad_norm": 0.20466424524784088,
      "learning_rate": 0.0005198019801980198,
      "loss": 1.3937,
      "step": 120
    },
    {
      "epoch": 0.5543237250554324,
      "grad_norm": 0.2181282341480255,
      "learning_rate": 0.0004950495049504951,
      "loss": 1.3972,
      "step": 125
    },
    {
      "epoch": 0.5764966740576497,
      "grad_norm": 0.22615699470043182,
      "learning_rate": 0.0004702970297029703,
      "loss": 1.3882,
      "step": 130
    },
    {
      "epoch": 0.5986696230598669,
      "grad_norm": 0.1967965066432953,
      "learning_rate": 0.00044554455445544556,
      "loss": 1.388,
      "step": 135
    },
    {
      "epoch": 0.6208425720620843,
      "grad_norm": 0.2030034065246582,
      "learning_rate": 0.0004207920792079208,
      "loss": 1.4048,
      "step": 140
    },
    {
      "epoch": 0.6430155210643016,
      "grad_norm": 0.2136310189962387,
      "learning_rate": 0.00039603960396039607,
      "loss": 1.3918,
      "step": 145
    },
    {
      "epoch": 0.6651884700665188,
      "grad_norm": 0.22149060666561127,
      "learning_rate": 0.0003712871287128713,
      "loss": 1.4023,
      "step": 150
    },
    {
      "epoch": 0.6873614190687362,
      "grad_norm": 0.2130667269229889,
      "learning_rate": 0.0003465346534653465,
      "loss": 1.3933,
      "step": 155
    },
    {
      "epoch": 0.7095343680709535,
      "grad_norm": 0.19920696318149567,
      "learning_rate": 0.0003217821782178218,
      "loss": 1.3815,
      "step": 160
    },
    {
      "epoch": 0.7317073170731707,
      "grad_norm": 0.20453611016273499,
      "learning_rate": 0.000297029702970297,
      "loss": 1.3648,
      "step": 165
    },
    {
      "epoch": 0.753880266075388,
      "grad_norm": 0.21325863897800446,
      "learning_rate": 0.0002722772277227723,
      "loss": 1.3773,
      "step": 170
    },
    {
      "epoch": 0.7760532150776053,
      "grad_norm": 0.2014823704957962,
      "learning_rate": 0.00024752475247524753,
      "loss": 1.3881,
      "step": 175
    },
    {
      "epoch": 0.7982261640798226,
      "grad_norm": 0.20359407365322113,
      "learning_rate": 0.00022277227722772278,
      "loss": 1.3826,
      "step": 180
    },
    {
      "epoch": 0.8203991130820399,
      "grad_norm": 0.21738748252391815,
      "learning_rate": 0.00019801980198019803,
      "loss": 1.3705,
      "step": 185
    },
    {
      "epoch": 0.8425720620842572,
      "grad_norm": 0.1990172564983368,
      "learning_rate": 0.00017326732673267326,
      "loss": 1.3693,
      "step": 190
    },
    {
      "epoch": 0.8647450110864745,
      "grad_norm": 0.2007543295621872,
      "learning_rate": 0.0001485148514851485,
      "loss": 1.3575,
      "step": 195
    },
    {
      "epoch": 0.8869179600886918,
      "grad_norm": 0.5149243474006653,
      "learning_rate": 0.00012376237623762376,
      "loss": 1.374,
      "step": 200
    },
    {
      "epoch": 0.9090909090909091,
      "grad_norm": 0.2131042778491974,
      "learning_rate": 9.900990099009902e-05,
      "loss": 1.3636,
      "step": 205
    },
    {
      "epoch": 0.9312638580931264,
      "grad_norm": 0.19097404181957245,
      "learning_rate": 7.425742574257426e-05,
      "loss": 1.3489,
      "step": 210
    },
    {
      "epoch": 0.9534368070953437,
      "grad_norm": 0.19905418157577515,
      "learning_rate": 4.950495049504951e-05,
      "loss": 1.3442,
      "step": 215
    },
    {
      "epoch": 0.975609756097561,
      "grad_norm": 0.19617854058742523,
      "learning_rate": 2.4752475247524754e-05,
      "loss": 1.3721,
      "step": 220
    },
    {
      "epoch": 0.9977827050997783,
      "grad_norm": 0.20064575970172882,
      "learning_rate": 0.0,
      "loss": 1.3767,
      "step": 225
    },
    {
      "epoch": 0.9977827050997783,
      "eval_loss": 1.7732421159744263,
      "eval_runtime": 0.5415,
      "eval_samples_per_second": 1.847,
      "eval_steps_per_second": 1.847,
      "step": 225
    },
    {
      "epoch": 0.9977827050997783,
      "step": 225,
      "total_flos": 3.3259687694984806e+17,
      "train_loss": 1.4963340536753336,
      "train_runtime": 725.2803,
      "train_samples_per_second": 9.934,
      "train_steps_per_second": 0.31
    }
  ],
  "logging_steps": 5,
  "max_steps": 225,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 1,
  "save_steps": 100,
  "stateful_callbacks": {
    "TrainerControl": {
      "args": {
        "should_epoch_stop": false,
        "should_evaluate": false,
        "should_log": false,
        "should_save": true,
        "should_training_stop": true
      },
      "attributes": {}
    }
  },
  "total_flos": 3.3259687694984806e+17,
  "train_batch_size": 4,
  "trial_name": null,
  "trial_params": null
}