Model save

Browse files

Files changed (6) hide show

README.md +69 -0
adapter_model.safetensors +1 -1
all_results.json +9 -0
runs/Sep29_14-41-10_5bc1778c8f32/events.out.tfevents.1727620913.5bc1778c8f32.69828.0 +2 -2
train_results.json +9 -0
trainer_state.json +722 -0

README.md ADDED Viewed

	@@ -0,0 +1,69 @@

+---
+base_model: google/gemma-7b
+datasets:
+- generator
+library_name: peft
+license: gemma
+tags:
+- trl
+- sft
+- generated_from_trainer
+model-index:
+- name: gemma7b-gpt4o_100k_classification-lora
+  results: []
+---
+<!-- This model card has been generated automatically according to the information the Trainer had access to. You
+should probably proofread and complete it, then remove this comment. -->
+# gemma7b-gpt4o_100k_classification-lora
+This model is a fine-tuned version of [google/gemma-7b](https://huggingface.co/google/gemma-7b) on the generator dataset.
+It achieves the following results on the evaluation set:
+- Loss: 1.9203
+## Model description
+More information needed
+## Intended uses & limitations
+More information needed
+## Training and evaluation data
+More information needed
+## Training procedure
+### Training hyperparameters
+The following hyperparameters were used during training:
+- learning_rate: 0.0003
+- train_batch_size: 1
+- eval_batch_size: 1
+- seed: 42
+- distributed_type: multi-GPU
+- num_devices: 8
+- gradient_accumulation_steps: 2
+- total_train_batch_size: 16
+- total_eval_batch_size: 8
+- optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
+- lr_scheduler_type: cosine
+- lr_scheduler_warmup_ratio: 0.1
+- num_epochs: 1
+### Training results
+| Training Loss | Epoch  | Step | Validation Loss |
+|:-------------:|:------:|:----:|:---------------:|
+| 1.3004        | 0.9990 | 478  | 1.9203          |
+### Framework versions
+- PEFT 0.13.0
+- Transformers 4.45.1
+- Pytorch 2.4.1+cu121
+- Datasets 3.0.1
+- Tokenizers 0.20.0

adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:6662d254c74c96818354ea85ef13c07848b4e950419c61f0998b0c7f53b79eb4
 size 12859872

 version https://git-lfs.github.com/spec/v1
+oid sha256:c0a4671c221c07d04c0ad389118eacc3ddb3fb12ca4c1b5c8baddc1a3b35b9cd
 size 12859872

all_results.json ADDED Viewed

	@@ -0,0 +1,9 @@

+{
+    "epoch": 0.9989550679205852,
+    "total_flos": 3.643767570437243e+17,
+    "train_loss": 4.360991338805674,
+    "train_runtime": 2613.4355,
+    "train_samples": 92634,
+    "train_samples_per_second": 2.928,
+    "train_steps_per_second": 0.183
+}

runs/Sep29_14-41-10_5bc1778c8f32/events.out.tfevents.1727620913.5bc1778c8f32.69828.0 CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:542f123024a0c6a22093b91008168279b62434b5758c6561113c7c8c635faa77
-size 23713

 version https://git-lfs.github.com/spec/v1
+oid sha256:18f39eb3af9251d4390c29c3194a7a99a0811394a82c893ef1da371e1e77eaef
+size 27503

train_results.json ADDED Viewed

	@@ -0,0 +1,9 @@

+{
+    "epoch": 0.9989550679205852,
+    "total_flos": 3.643767570437243e+17,
+    "train_loss": 4.360991338805674,
+    "train_runtime": 2613.4355,
+    "train_samples": 92634,
+    "train_samples_per_second": 2.928,
+    "train_steps_per_second": 0.183
+}

trainer_state.json ADDED Viewed

	@@ -0,0 +1,722 @@

+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 0.9989550679205852,
+  "eval_steps": 500,
+  "global_step": 478,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.0020898641588296763,
+      "grad_norm": 216.79754638671875,
+      "learning_rate": 6.2499999999999995e-06,
+      "loss": 57.9838,
+      "step": 1
+    },
+    {
+      "epoch": 0.01044932079414838,
+      "grad_norm": 184.4412841796875,
+      "learning_rate": 3.125e-05,
+      "loss": 60.093,
+      "step": 5
+    },
+    {
+      "epoch": 0.02089864158829676,
+      "grad_norm": 107.91060638427734,
+      "learning_rate": 6.25e-05,
+      "loss": 48.3094,
+      "step": 10
+    },
+    {
+      "epoch": 0.03134796238244514,
+      "grad_norm": 17.1436710357666,
+      "learning_rate": 9.374999999999999e-05,
+      "loss": 33.2668,
+      "step": 15
+    },
+    {
+      "epoch": 0.04179728317659352,
+      "grad_norm": 12.335116386413574,
+      "learning_rate": 0.000125,
+      "loss": 27.698,
+      "step": 20
+    },
+    {
+      "epoch": 0.0522466039707419,
+      "grad_norm": 6.2943196296691895,
+      "learning_rate": 0.00015625,
+      "loss": 25.9692,
+      "step": 25
+    },
+    {
+      "epoch": 0.06269592476489028,
+      "grad_norm": 5.466517448425293,
+      "learning_rate": 0.00018749999999999998,
+      "loss": 25.2691,
+      "step": 30
+    },
+    {
+      "epoch": 0.07314524555903866,
+      "grad_norm": 9.744288444519043,
+      "learning_rate": 0.00021874999999999998,
+      "loss": 23.7082,
+      "step": 35
+    },
+    {
+      "epoch": 0.08359456635318704,
+      "grad_norm": 19.27219581604004,
+      "learning_rate": 0.00025,
+      "loss": 21.3655,
+      "step": 40
+    },
+    {
+      "epoch": 0.09404388714733543,
+      "grad_norm": 41.77222442626953,
+      "learning_rate": 0.00028125,
+      "loss": 16.1707,
+      "step": 45
+    },
+    {
+      "epoch": 0.1044932079414838,
+      "grad_norm": 18.60293960571289,
+      "learning_rate": 0.0002999839868651235,
+      "loss": 8.0969,
+      "step": 50
+    },
+    {
+      "epoch": 0.11494252873563218,
+      "grad_norm": 11.452897071838379,
+      "learning_rate": 0.00029980387835984494,
+      "loss": 4.1367,
+      "step": 55
+    },
+    {
+      "epoch": 0.12539184952978055,
+      "grad_norm": 8.422245979309082,
+      "learning_rate": 0.000299423886051382,
+      "loss": 3.1254,
+      "step": 60
+    },
+    {
+      "epoch": 0.13584117032392895,
+      "grad_norm": 2.444629669189453,
+      "learning_rate": 0.0002988445169647103,
+      "loss": 2.4463,
+      "step": 65
+    },
+    {
+      "epoch": 0.14629049111807732,
+      "grad_norm": 1.307098627090454,
+      "learning_rate": 0.0002980665441538907,
+      "loss": 2.1685,
+      "step": 70
+    },
+    {
+      "epoch": 0.15673981191222572,
+      "grad_norm": 2.10964298248291,
+      "learning_rate": 0.0002970910056705806,
+      "loss": 2.0392,
+      "step": 75
+    },
+    {
+      "epoch": 0.1671891327063741,
+      "grad_norm": 1.1905853748321533,
+      "learning_rate": 0.0002959192031789579,
+      "loss": 1.9225,
+      "step": 80
+    },
+    {
+      "epoch": 0.17763845350052246,
+      "grad_norm": 0.8916841745376587,
+      "learning_rate": 0.0002945527002189068,
+      "loss": 1.8422,
+      "step": 85
+    },
+    {
+      "epoch": 0.18808777429467086,
+      "grad_norm": 3.186051845550537,
+      "learning_rate": 0.00029299332011978107,
+      "loss": 1.748,
+      "step": 90
+    },
+    {
+      "epoch": 0.19853709508881923,
+      "grad_norm": 3.865817070007324,
+      "learning_rate": 0.00029124314356752967,
+      "loss": 1.7184,
+      "step": 95
+    },
+    {
+      "epoch": 0.2089864158829676,
+      "grad_norm": 2.8790738582611084,
+      "learning_rate": 0.0002893045058284311,
+      "loss": 1.6432,
+      "step": 100
+    },
+    {
+      "epoch": 0.219435736677116,
+      "grad_norm": 1.6771491765975952,
+      "learning_rate": 0.00028717999363313967,
+      "loss": 1.6567,
+      "step": 105
+    },
+    {
+      "epoch": 0.22988505747126436,
+      "grad_norm": 2.725285530090332,
+      "learning_rate": 0.00028487244172520246,
+      "loss": 1.6157,
+      "step": 110
+    },
+    {
+      "epoch": 0.24033437826541273,
+      "grad_norm": 2.289280652999878,
+      "learning_rate": 0.0002823849290786517,
+      "loss": 1.6148,
+      "step": 115
+    },
+    {
+      "epoch": 0.2507836990595611,
+      "grad_norm": 2.0211188793182373,
+      "learning_rate": 0.0002797207747897198,
+      "loss": 1.5858,
+      "step": 120
+    },
+    {
+      "epoch": 0.2612330198537095,
+      "grad_norm": 2.0264103412628174,
+      "learning_rate": 0.00027688353364815834,
+      "loss": 1.5708,
+      "step": 125
+    },
+    {
+      "epoch": 0.2716823406478579,
+      "grad_norm": 0.9253348112106323,
+      "learning_rate": 0.0002738769913940706,
+      "loss": 1.5481,
+      "step": 130
+    },
+    {
+      "epoch": 0.28213166144200624,
+      "grad_norm": 3.3143184185028076,
+      "learning_rate": 0.00027070515966658604,
+      "loss": 1.5535,
+      "step": 135
+    },
+    {
+      "epoch": 0.29258098223615464,
+      "grad_norm": 4.024845600128174,
+      "learning_rate": 0.0002673722706511174,
+      "loss": 1.5542,
+      "step": 140
+    },
+    {
+      "epoch": 0.30303030303030304,
+      "grad_norm": 3.718261241912842,
+      "learning_rate": 0.00026388277143234146,
+      "loss": 1.5507,
+      "step": 145
+    },
+    {
+      "epoch": 0.31347962382445144,
+      "grad_norm": 1.9526076316833496,
+      "learning_rate": 0.0002602413180604401,
+      "loss": 1.5251,
+      "step": 150
+    },
+    {
+      "epoch": 0.3239289446185998,
+      "grad_norm": 1.5725075006484985,
+      "learning_rate": 0.00025645276933851667,
+      "loss": 1.4937,
+      "step": 155
+    },
+    {
+      "epoch": 0.3343782654127482,
+      "grad_norm": 4.266882419586182,
+      "learning_rate": 0.00025252218033947993,
+      "loss": 1.4944,
+      "step": 160
+    },
+    {
+      "epoch": 0.3448275862068966,
+      "grad_norm": 2.6647915840148926,
+      "learning_rate": 0.0002484547956610429,
+      "loss": 1.4798,
+      "step": 165
+    },
+    {
+      "epoch": 0.3552769070010449,
+      "grad_norm": 2.0770153999328613,
+      "learning_rate": 0.0002442560424278399,
+      "loss": 1.4708,
+      "step": 170
+    },
+    {
+      "epoch": 0.3657262277951933,
+      "grad_norm": 1.8132774829864502,
+      "learning_rate": 0.00023993152304999582,
+      "loss": 1.4554,
+      "step": 175
+    },
+    {
+      "epoch": 0.3761755485893417,
+      "grad_norm": 1.9493850469589233,
+      "learning_rate": 0.00023548700774781242,
+      "loss": 1.485,
+      "step": 180
+    },
+    {
+      "epoch": 0.38662486938349006,
+      "grad_norm": 3.6726951599121094,
+      "learning_rate": 0.00023092842685254442,
+      "loss": 1.4584,
+      "step": 185
+    },
+    {
+      "epoch": 0.39707419017763845,
+      "grad_norm": 2.253319501876831,
+      "learning_rate": 0.00022626186289353913,
+      "loss": 1.4569,
+      "step": 190
+    },
+    {
+      "epoch": 0.40752351097178685,
+      "grad_norm": 3.336820125579834,
+      "learning_rate": 0.00022149354248229784,
+      "loss": 1.4334,
+      "step": 195
+    },
+    {
+      "epoch": 0.4179728317659352,
+      "grad_norm": 3.0895018577575684,
+      "learning_rate": 0.0002166298280042877,
+      "loss": 1.4203,
+      "step": 200
+    },
+    {
+      "epoch": 0.4284221525600836,
+      "grad_norm": 1.8486225605010986,
+      "learning_rate": 0.00021167720912959004,
+      "loss": 1.414,
+      "step": 205
+    },
+    {
+      "epoch": 0.438871473354232,
+      "grad_norm": 0.7216203808784485,
+      "learning_rate": 0.00020664229415371266,
+      "loss": 1.3897,
+      "step": 210
+    },
+    {
+      "epoch": 0.44932079414838033,
+      "grad_norm": 2.909454107284546,
+      "learning_rate": 0.0002015318011801192,
+      "loss": 1.3713,
+      "step": 215
+    },
+    {
+      "epoch": 0.45977011494252873,
+      "grad_norm": 1.5531753301620483,
+      "learning_rate": 0.0001963525491562421,
+      "loss": 1.4055,
+      "step": 220
+    },
+    {
+      "epoch": 0.4702194357366771,
+      "grad_norm": 4.848015308380127,
+      "learning_rate": 0.00019111144877493873,
+      "loss": 1.435,
+      "step": 225
+    },
+    {
+      "epoch": 0.48066875653082547,
+      "grad_norm": 4.833097457885742,
+      "learning_rate": 0.00018581549325353126,
+      "loss": 1.417,
+      "step": 230
+    },
+    {
+      "epoch": 0.49111807732497387,
+      "grad_norm": 1.415703296661377,
+      "learning_rate": 0.00018047174900273435,
+      "loss": 1.4449,
+      "step": 235
+    },
+    {
+      "epoch": 0.5015673981191222,
+      "grad_norm": 0.9621894359588623,
+      "learning_rate": 0.00017508734619791966,
+      "loss": 1.3907,
+      "step": 240
+    },
+    {
+      "epoch": 0.5120167189132706,
+      "grad_norm": 2.091428279876709,
+      "learning_rate": 0.0001696694692653004,
+      "loss": 1.3581,
+      "step": 245
+    },
+    {
+      "epoch": 0.522466039707419,
+      "grad_norm": 1.3531287908554077,
+      "learning_rate": 0.00016422534729572738,
+      "loss": 1.3717,
+      "step": 250
+    },
+    {
+      "epoch": 0.5329153605015674,
+      "grad_norm": 1.8569897413253784,
+      "learning_rate": 0.0001587622443988899,
+      "loss": 1.3811,
+      "step": 255
+    },
+    {
+      "epoch": 0.5433646812957158,
+      "grad_norm": 4.248292446136475,
+      "learning_rate": 0.0001532874500107902,
+      "loss": 1.3797,
+      "step": 260
+    },
+    {
+      "epoch": 0.5538140020898642,
+      "grad_norm": 2.5460174083709717,
+      "learning_rate": 0.0001478082691674256,
+      "loss": 1.3576,
+      "step": 265
+    },
+    {
+      "epoch": 0.5642633228840125,
+      "grad_norm": 1.3485275506973267,
+      "learning_rate": 0.00014233201275765494,
+      "loss": 1.383,
+      "step": 270
+    },
+    {
+      "epoch": 0.5747126436781609,
+      "grad_norm": 1.1686965227127075,
+      "learning_rate": 0.00013686598776825563,
+      "loss": 1.3715,
+      "step": 275
+    },
+    {
+      "epoch": 0.5851619644723093,
+      "grad_norm": 1.8593087196350098,
+      "learning_rate": 0.0001314174875341878,
+      "loss": 1.3671,
+      "step": 280
+    },
+    {
+      "epoch": 0.5956112852664577,
+      "grad_norm": 1.5989689826965332,
+      "learning_rate": 0.0001259937820070732,
+      "loss": 1.3379,
+      "step": 285
+    },
+    {
+      "epoch": 0.6060606060606061,
+      "grad_norm": 3.129467248916626,
+      "learning_rate": 0.00012060210805487529,
+      "loss": 1.3436,
+      "step": 290
+    },
+    {
+      "epoch": 0.6165099268547545,
+      "grad_norm": 1.071311593055725,
+      "learning_rate": 0.00011524965980572284,
+      "loss": 1.3711,
+      "step": 295
+    },
+    {
+      "epoch": 0.6269592476489029,
+      "grad_norm": 2.8161048889160156,
+      "learning_rate": 0.00010994357904876106,
+      "loss": 1.3242,
+      "step": 300
+    },
+    {
+      "epoch": 0.6374085684430512,
+      "grad_norm": 0.9445050954818726,
+      "learning_rate": 0.00010469094570483928,
+      "loss": 1.3217,
+      "step": 305
+    },
+    {
+      "epoch": 0.6478578892371996,
+      "grad_norm": 1.53034508228302,
+      "learning_rate": 9.949876837974944e-05,
+      "loss": 1.314,
+      "step": 310
+    },
+    {
+      "epoch": 0.658307210031348,
+      "grad_norm": 1.8168761730194092,
+      "learning_rate": 9.437397501262026e-05,
+      "loss": 1.3365,
+      "step": 315
+    },
+    {
+      "epoch": 0.6687565308254964,
+      "grad_norm": 1.4955302476882935,
+      "learning_rate": 8.932340363194595e-05,
+      "loss": 1.3154,
+      "step": 320
+    },
+    {
+      "epoch": 0.6792058516196448,
+      "grad_norm": 1.2552021741867065,
+      "learning_rate": 8.435379323158218e-05,
+      "loss": 1.3366,
+      "step": 325
+    },
+    {
+      "epoch": 0.6896551724137931,
+      "grad_norm": 2.914289712905884,
+      "learning_rate": 7.947177477888472e-05,
+      "loss": 1.3233,
+      "step": 330
+    },
+    {
+      "epoch": 0.7001044932079414,
+      "grad_norm": 1.3406000137329102,
+      "learning_rate": 7.46838623669881e-05,
+      "loss": 1.3264,
+      "step": 335
+    },
+    {
+      "epoch": 0.7105538140020898,
+      "grad_norm": 0.9025297164916992,
+      "learning_rate": 6.999644452302975e-05,
+      "loss": 1.3197,
+      "step": 340
+    },
+    {
+      "epoch": 0.7210031347962382,
+      "grad_norm": 1.2824598550796509,
+      "learning_rate": 6.541577568391758e-05,
+      "loss": 1.3201,
+      "step": 345
+    },
+    {
+      "epoch": 0.7314524555903866,
+      "grad_norm": 0.9296241998672485,
+      "learning_rate": 6.0947967851014405e-05,
+      "loss": 1.3097,
+      "step": 350
+    },
+    {
+      "epoch": 0.741901776384535,
+      "grad_norm": 0.8738858699798584,
+      "learning_rate": 5.659898243487463e-05,
+      "loss": 1.3044,
+      "step": 355
+    },
+    {
+      "epoch": 0.7523510971786834,
+      "grad_norm": 1.8482000827789307,
+      "learning_rate": 5.237462230091467e-05,
+      "loss": 1.3108,
+      "step": 360
+    },
+    {
+      "epoch": 0.7628004179728317,
+      "grad_norm": 2.537909746170044,
+      "learning_rate": 4.8280524026630565e-05,
+      "loss": 1.3164,
+      "step": 365
+    },
+    {
+      "epoch": 0.7732497387669801,
+      "grad_norm": 1.3068586587905884,
+      "learning_rate": 4.432215038069449e-05,
+      "loss": 1.2782,
+      "step": 370
+    },
+    {
+      "epoch": 0.7836990595611285,
+      "grad_norm": 1.3742858171463013,
+      "learning_rate": 4.0504783033964645e-05,
+      "loss": 1.3179,
+      "step": 375
+    },
+    {
+      "epoch": 0.7941483803552769,
+      "grad_norm": 1.2923156023025513,
+      "learning_rate": 3.6833515512134606e-05,
+      "loss": 1.2904,
+      "step": 380
+    },
+    {
+      "epoch": 0.8045977011494253,
+      "grad_norm": 0.7867398262023926,
+      "learning_rate": 3.331324639942526e-05,
+      "loss": 1.3029,
+      "step": 385
+    },
+    {
+      "epoch": 0.8150470219435737,
+      "grad_norm": 1.1442195177078247,
+      "learning_rate": 2.9948672802388135e-05,
+      "loss": 1.3069,
+      "step": 390
+    },
+    {
+      "epoch": 0.8254963427377221,
+      "grad_norm": 1.4821033477783203,
+      "learning_rate": 2.67442840825406e-05,
+      "loss": 1.3177,
+      "step": 395
+    },
+    {
+      "epoch": 0.8359456635318704,
+      "grad_norm": 0.9633380770683289,
+      "learning_rate": 2.3704355866196373e-05,
+      "loss": 1.3249,
+      "step": 400
+    },
+    {
+      "epoch": 0.8463949843260188,
+      "grad_norm": 1.2908155918121338,
+      "learning_rate": 2.083294433948324e-05,
+      "loss": 1.3449,
+      "step": 405
+    },
+    {
+      "epoch": 0.8568443051201672,
+      "grad_norm": 1.1834619045257568,
+      "learning_rate": 1.813388083616068e-05,
+      "loss": 1.3086,
+      "step": 410
+    },
+    {
+      "epoch": 0.8672936259143156,
+      "grad_norm": 1.1399352550506592,
+      "learning_rate": 1.5610766725458834e-05,
+      "loss": 1.315,
+      "step": 415
+    },
+    {
+      "epoch": 0.877742946708464,
+      "grad_norm": 1.2300066947937012,
+      "learning_rate": 1.326696860675981e-05,
+      "loss": 1.2894,
+      "step": 420
+    },
+    {
+      "epoch": 0.8881922675026124,
+      "grad_norm": 0.9975532293319702,
+      "learning_rate": 1.1105613817532976e-05,
+      "loss": 1.2953,
+      "step": 425
+    },
+    {
+      "epoch": 0.8986415882967607,
+      "grad_norm": 0.9357336163520813,
+      "learning_rate": 9.129586260518634e-06,
+      "loss": 1.3159,
+      "step": 430
+    },
+    {
+      "epoch": 0.9090909090909091,
+      "grad_norm": 0.7603440880775452,
+      "learning_rate": 7.34152255572697e-06,
+      "loss": 1.2897,
+      "step": 435
+    },
+    {
+      "epoch": 0.9195402298850575,
+      "grad_norm": 0.8711851835250854,
+      "learning_rate": 5.743808522387544e-06,
+      "loss": 1.275,
+      "step": 440
+    },
+    {
+      "epoch": 0.9299895506792059,
+      "grad_norm": 0.9144044518470764,
+      "learning_rate": 4.33857599554282e-06,
+      "loss": 1.328,
+      "step": 445
+    },
+    {
+      "epoch": 0.9404388714733543,
+      "grad_norm": 0.862479567527771,
+      "learning_rate": 3.1276999815337544e-06,
+      "loss": 1.2879,
+      "step": 450
+    },
+    {
+      "epoch": 0.9508881922675027,
+      "grad_norm": 0.7352892756462097,
+      "learning_rate": 2.1127961561727193e-06,
+      "loss": 1.2873,
+      "step": 455
+    },
+    {
+      "epoch": 0.9613375130616509,
+      "grad_norm": 2.582821846008301,
+      "learning_rate": 1.2952187089419642e-06,
+      "loss": 1.3191,
+      "step": 460
+    },
+    {
+      "epoch": 0.9717868338557993,
+      "grad_norm": 0.7060139179229736,
+      "learning_rate": 6.760585360942872e-07,
+      "loss": 1.3047,
+      "step": 465
+    },
+    {
+      "epoch": 0.9822361546499477,
+      "grad_norm": 0.8089200258255005,
+      "learning_rate": 2.5614178506644934e-07,
+      "loss": 1.2743,
+      "step": 470
+    },
+    {
+      "epoch": 0.9926854754440961,
+      "grad_norm": 1.2739328145980835,
+      "learning_rate": 3.6028752148081766e-08,
+      "loss": 1.3004,
+      "step": 475
+    },
+    {
+      "epoch": 0.9989550679205852,
+      "eval_loss": 1.9203195571899414,
+      "eval_runtime": 0.8302,
+      "eval_samples_per_second": 2.409,
+      "eval_steps_per_second": 1.205,
+      "step": 478
+    },
+    {
+      "epoch": 0.9989550679205852,
+      "step": 478,
+      "total_flos": 3.643767570437243e+17,
+      "train_loss": 4.360991338805674,
+      "train_runtime": 2613.4355,
+      "train_samples_per_second": 2.928,
+      "train_steps_per_second": 0.183
+    }
+  ],
+  "logging_steps": 5,
+  "max_steps": 478,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 1,
+  "save_steps": 100,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": true
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 3.643767570437243e+17,
+  "train_batch_size": 1,
+  "trial_name": null,
+  "trial_params": null
+}