{
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 2.032520325203252,
  "eval_steps": 500,
  "global_step": 500,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.1,
      "grad_norm": 0.7537718524378184,
      "learning_rate": 4.998825837977733e-05,
      "loss": 1.0335,
      "step": 25
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.5115893351462029,
      "learning_rate": 4.9951068336359185e-05,
      "loss": 0.9543,
      "step": 50
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.3655365544393326,
      "learning_rate": 4.9888447388643216e-05,
      "loss": 0.889,
      "step": 75
    },
    {
      "epoch": 0.41,
      "grad_norm": 0.3802482724658219,
      "learning_rate": 4.980045936184552e-05,
      "loss": 0.8824,
      "step": 100
    },
    {
      "epoch": 0.51,
      "grad_norm": 0.46128857579583404,
      "learning_rate": 4.968719393609757e-05,
      "loss": 0.8812,
      "step": 125
    },
    {
      "epoch": 0.61,
      "grad_norm": 0.4675840689300933,
      "learning_rate": 4.954876655504144e-05,
      "loss": 0.8626,
      "step": 150
    },
    {
      "epoch": 0.71,
      "grad_norm": 0.5174033092078555,
      "learning_rate": 4.938531830816607e-05,
      "loss": 0.8542,
      "step": 175
    },
    {
      "epoch": 0.81,
      "grad_norm": 0.47966453174679635,
      "learning_rate": 4.919701578700444e-05,
      "loss": 0.8615,
      "step": 200
    },
    {
      "epoch": 0.91,
      "grad_norm": 0.5800019356792034,
      "learning_rate": 4.898405091533834e-05,
      "loss": 0.8198,
      "step": 225
    },
    {
      "epoch": 1.02,
      "grad_norm": 0.5068384935929343,
      "learning_rate": 4.874664075358366e-05,
      "loss": 0.835,
      "step": 250
    },
    {
      "epoch": 1.12,
      "grad_norm": 0.5665554500957887,
      "learning_rate": 4.84850272775557e-05,
      "loss": 0.833,
      "step": 275
    },
    {
      "epoch": 1.22,
      "grad_norm": 0.6225574393610873,
      "learning_rate": 4.8199477131839854e-05,
      "loss": 0.8362,
      "step": 300
    },
    {
      "epoch": 1.32,
      "grad_norm": 0.5883987854013639,
      "learning_rate": 4.789028135801918e-05,
      "loss": 0.8315,
      "step": 325
    },
    {
      "epoch": 1.42,
      "grad_norm": 0.6212622090526995,
      "learning_rate": 4.7557755098035814e-05,
      "loss": 0.8082,
      "step": 350
    },
    {
      "epoch": 1.52,
      "grad_norm": 0.6254380356435723,
      "learning_rate": 4.720223727298845e-05,
      "loss": 0.8112,
      "step": 375
    },
    {
      "epoch": 1.63,
      "grad_norm": 0.7114667768707209,
      "learning_rate": 4.682409023769342e-05,
      "loss": 0.8141,
      "step": 400
    },
    {
      "epoch": 1.73,
      "grad_norm": 0.7156140969579615,
      "learning_rate": 4.6423699411361474e-05,
      "loss": 0.8214,
      "step": 425
    },
    {
      "epoch": 1.83,
      "grad_norm": 0.6560300477797654,
      "learning_rate": 4.600147288476647e-05,
      "loss": 0.819,
      "step": 450
    },
    {
      "epoch": 1.93,
      "grad_norm": 0.6220749749772762,
      "learning_rate": 4.5557841004306625e-05,
      "loss": 0.8177,
      "step": 475
    },
    {
      "epoch": 2.03,
      "grad_norm": 0.7459915153227248,
      "learning_rate": 4.509325593338203e-05,
      "loss": 0.8207,
      "step": 500
    }
  ],
  "logging_steps": 25,
  "max_steps": 2460,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 10,
  "save_steps": 500,
  "total_flos": 152390335463424.0,
  "train_batch_size": 1,
  "trial_name": null,
  "trial_params": null
}