{
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 0.1678190239645566,
  "eval_steps": 500,
  "global_step": 2500,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.006712760958582265,
      "grad_norm": 1.0528477430343628,
      "learning_rate": 0.00014985229447149842,
      "loss": 7.6572,
      "step": 100
    },
    {
      "epoch": 0.01342552191716453,
      "grad_norm": 1.1515228748321533,
      "learning_rate": 0.00014940916946874937,
      "loss": 6.1239,
      "step": 200
    },
    {
      "epoch": 0.020138282875746795,
      "grad_norm": 1.2438573837280273,
      "learning_rate": 0.00014867237372557577,
      "loss": 5.4067,
      "step": 300
    },
    {
      "epoch": 0.02685104383432906,
      "grad_norm": 1.3082321882247925,
      "learning_rate": 0.00014764481515444297,
      "loss": 5.0934,
      "step": 400
    },
    {
      "epoch": 0.03356380479291132,
      "grad_norm": 1.4851253032684326,
      "learning_rate": 0.00014633054922174807,
      "loss": 4.8669,
      "step": 500
    },
    {
      "epoch": 0.04027656575149359,
      "grad_norm": 1.3247835636138916,
      "learning_rate": 0.00014473476294210664,
      "loss": 4.7151,
      "step": 600
    },
    {
      "epoch": 0.04698932671007586,
      "grad_norm": 1.5466852188110352,
      "learning_rate": 0.0001428637544067573,
      "loss": 4.5684,
      "step": 700
    },
    {
      "epoch": 0.05370208766865812,
      "grad_norm": 1.3418868780136108,
      "learning_rate": 0.0001407249079268789,
      "loss": 4.4861,
      "step": 800
    },
    {
      "epoch": 0.060414848627240385,
      "grad_norm": 1.4495049715042114,
      "learning_rate": 0.0001383266648899225,
      "loss": 4.3896,
      "step": 900
    },
    {
      "epoch": 0.06712760958582265,
      "grad_norm": 1.2629677057266235,
      "learning_rate": 0.0001356784904439796,
      "loss": 4.3076,
      "step": 1000
    },
    {
      "epoch": 0.07384037054440491,
      "grad_norm": 1.382216215133667,
      "learning_rate": 0.00013279083614167278,
      "loss": 4.2179,
      "step": 1100
    },
    {
      "epoch": 0.08055313150298718,
      "grad_norm": 1.2883789539337158,
      "learning_rate": 0.00012967509869100336,
      "loss": 4.1599,
      "step": 1200
    },
    {
      "epoch": 0.08726589246156945,
      "grad_norm": 1.3527660369873047,
      "learning_rate": 0.00012634357497595263,
      "loss": 4.0976,
      "step": 1300
    },
    {
      "epoch": 0.09397865342015171,
      "grad_norm": 1.3394412994384766,
      "learning_rate": 0.00012280941352435837,
      "loss": 4.0805,
      "step": 1400
    },
    {
      "epoch": 0.10069141437873397,
      "grad_norm": 1.4646199941635132,
      "learning_rate": 0.00011908656261460721,
      "loss": 4.0032,
      "step": 1500
    },
    {
      "epoch": 0.10740417533731624,
      "grad_norm": 1.2548878192901611,
      "learning_rate": 0.00011518971522595105,
      "loss": 3.9702,
      "step": 1600
    },
    {
      "epoch": 0.1141169362958985,
      "grad_norm": 1.363207221031189,
      "learning_rate": 0.00011113425104971176,
      "loss": 3.9321,
      "step": 1700
    },
    {
      "epoch": 0.12082969725448077,
      "grad_norm": 1.3911628723144531,
      "learning_rate": 0.00010693617579023885,
      "loss": 3.8974,
      "step": 1800
    },
    {
      "epoch": 0.12754245821306304,
      "grad_norm": 1.3630716800689697,
      "learning_rate": 0.00010261205799518043,
      "loss": 3.8514,
      "step": 1900
    },
    {
      "epoch": 0.1342552191716453,
      "grad_norm": 1.2687169313430786,
      "learning_rate": 9.817896366438074e-05,
      "loss": 3.818,
      "step": 2000
    },
    {
      "epoch": 0.14096798013022757,
      "grad_norm": 1.3437057733535767,
      "learning_rate": 9.36543888954819e-05,
      "loss": 3.8071,
      "step": 2100
    },
    {
      "epoch": 0.14768074108880982,
      "grad_norm": 1.3673392534255981,
      "learning_rate": 8.905619083205881e-05,
      "loss": 3.7842,
      "step": 2200
    },
    {
      "epoch": 0.1543935020473921,
      "grad_norm": 1.2775851488113403,
      "learning_rate": 8.440251718681331e-05,
      "loss": 3.7666,
      "step": 2300
    },
    {
      "epoch": 0.16110626300597436,
      "grad_norm": 1.382295846939087,
      "learning_rate": 7.971173461797922e-05,
      "loss": 3.679,
      "step": 2400
    },
    {
      "epoch": 0.1678190239645566,
      "grad_norm": 1.269216775894165,
      "learning_rate": 7.500235624161463e-05,
      "loss": 3.7059,
      "step": 2500
    }
  ],
  "logging_steps": 100,
  "max_steps": 5000,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 1,
  "save_steps": 1250,
  "stateful_callbacks": {
    "TrainerControl": {
      "args": {
        "should_epoch_stop": false,
        "should_evaluate": false,
        "should_log": false,
        "should_save": true,
        "should_training_stop": false
      },
      "attributes": {}
    }
  },
  "total_flos": 6499577364480000.0,
  "train_batch_size": 16,
  "trial_name": null,
  "trial_params": null
}