End of training

Browse files

Files changed (7) hide show

README.md +2 -1
all_results.json +12 -0
eval_results.json +7 -0
train_results.json +8 -0
trainer_state.json +640 -0
training_eval_loss.png +0 -0
training_loss.png +0 -0

README.md CHANGED Viewed

@@ -4,6 +4,7 @@ license: llama3.1
 base_model: meta-llama/Meta-Llama-3.1-8B
 tags:
 - llama-factory
 - generated_from_trainer
 model-index:
 - name: oh_v1.3_opengpt_x8
@@ -15,7 +16,7 @@ should probably proofread and complete it, then remove this comment. -->
 # oh_v1.3_opengpt_x8
-This model is a fine-tuned version of [meta-llama/Meta-Llama-3.1-8B](https://huggingface.co/meta-llama/Meta-Llama-3.1-8B) on an unknown dataset.
 It achieves the following results on the evaluation set:
 - Loss: 0.7300

 base_model: meta-llama/Meta-Llama-3.1-8B
 tags:
 - llama-factory
+- full
 - generated_from_trainer
 model-index:
 - name: oh_v1.3_opengpt_x8
 # oh_v1.3_opengpt_x8
+This model is a fine-tuned version of [meta-llama/Meta-Llama-3.1-8B](https://huggingface.co/meta-llama/Meta-Llama-3.1-8B) on the mlfoundations-dev/oh_v1.3_opengpt_x8 dataset.
 It achieves the following results on the evaluation set:
 - Loss: 0.7300

all_results.json ADDED Viewed

	@@ -0,0 +1,12 @@

+{
+    "epoch": 3.0,
+    "eval_loss": 0.7299705147743225,
+    "eval_runtime": 26.5424,
+    "eval_samples_per_second": 279.101,
+    "eval_steps_per_second": 1.093,
+    "total_flos": 1381905727488000.0,
+    "train_loss": 0.7044297796307188,
+    "train_runtime": 5353.5806,
+    "train_samples_per_second": 78.866,
+    "train_steps_per_second": 0.154
+}

eval_results.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+    "epoch": 3.0,
+    "eval_loss": 0.7299705147743225,
+    "eval_runtime": 26.5424,
+    "eval_samples_per_second": 279.101,
+    "eval_steps_per_second": 1.093
+}

train_results.json ADDED Viewed

	@@ -0,0 +1,8 @@

+{
+    "epoch": 3.0,
+    "total_flos": 1381905727488000.0,
+    "train_loss": 0.7044297796307188,
+    "train_runtime": 5353.5806,
+    "train_samples_per_second": 78.866,
+    "train_steps_per_second": 0.154
+}

trainer_state.json ADDED Viewed

	@@ -0,0 +1,640 @@

+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 3.0,
+  "eval_steps": 500,
+  "global_step": 825,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.03636363636363636,
+      "grad_norm": 3.2293971017711174,
+      "learning_rate": 5e-06,
+      "loss": 1.0337,
+      "step": 10
+    },
+    {
+      "epoch": 0.07272727272727272,
+      "grad_norm": 1.2461654883314972,
+      "learning_rate": 5e-06,
+      "loss": 0.9092,
+      "step": 20
+    },
+    {
+      "epoch": 0.10909090909090909,
+      "grad_norm": 1.0937923635217501,
+      "learning_rate": 5e-06,
+      "loss": 0.8658,
+      "step": 30
+    },
+    {
+      "epoch": 0.14545454545454545,
+      "grad_norm": 1.3350225945199414,
+      "learning_rate": 5e-06,
+      "loss": 0.844,
+      "step": 40
+    },
+    {
+      "epoch": 0.18181818181818182,
+      "grad_norm": 1.029425810987488,
+      "learning_rate": 5e-06,
+      "loss": 0.8249,
+      "step": 50
+    },
+    {
+      "epoch": 0.21818181818181817,
+      "grad_norm": 1.5219290967515304,
+      "learning_rate": 5e-06,
+      "loss": 0.8068,
+      "step": 60
+    },
+    {
+      "epoch": 0.2545454545454545,
+      "grad_norm": 1.6740495880819521,
+      "learning_rate": 5e-06,
+      "loss": 0.7989,
+      "step": 70
+    },
+    {
+      "epoch": 0.2909090909090909,
+      "grad_norm": 1.2973735477904815,
+      "learning_rate": 5e-06,
+      "loss": 0.7921,
+      "step": 80
+    },
+    {
+      "epoch": 0.32727272727272727,
+      "grad_norm": 0.8566363002967183,
+      "learning_rate": 5e-06,
+      "loss": 0.781,
+      "step": 90
+    },
+    {
+      "epoch": 0.36363636363636365,
+      "grad_norm": 0.9961672641644985,
+      "learning_rate": 5e-06,
+      "loss": 0.7745,
+      "step": 100
+    },
+    {
+      "epoch": 0.4,
+      "grad_norm": 0.8927257959121373,
+      "learning_rate": 5e-06,
+      "loss": 0.7754,
+      "step": 110
+    },
+    {
+      "epoch": 0.43636363636363634,
+      "grad_norm": 0.6135178704985191,
+      "learning_rate": 5e-06,
+      "loss": 0.772,
+      "step": 120
+    },
+    {
+      "epoch": 0.4727272727272727,
+      "grad_norm": 0.7431505188106242,
+      "learning_rate": 5e-06,
+      "loss": 0.7686,
+      "step": 130
+    },
+    {
+      "epoch": 0.509090909090909,
+      "grad_norm": 0.7150787812569424,
+      "learning_rate": 5e-06,
+      "loss": 0.7618,
+      "step": 140
+    },
+    {
+      "epoch": 0.5454545454545454,
+      "grad_norm": 0.6352342662453642,
+      "learning_rate": 5e-06,
+      "loss": 0.7611,
+      "step": 150
+    },
+    {
+      "epoch": 0.5818181818181818,
+      "grad_norm": 0.6257901300873526,
+      "learning_rate": 5e-06,
+      "loss": 0.7569,
+      "step": 160
+    },
+    {
+      "epoch": 0.6181818181818182,
+      "grad_norm": 0.6387102446786417,
+      "learning_rate": 5e-06,
+      "loss": 0.7611,
+      "step": 170
+    },
+    {
+      "epoch": 0.6545454545454545,
+      "grad_norm": 0.5983754152683597,
+      "learning_rate": 5e-06,
+      "loss": 0.7546,
+      "step": 180
+    },
+    {
+      "epoch": 0.6909090909090909,
+      "grad_norm": 0.7480127979666656,
+      "learning_rate": 5e-06,
+      "loss": 0.7566,
+      "step": 190
+    },
+    {
+      "epoch": 0.7272727272727273,
+      "grad_norm": 0.5804396007389026,
+      "learning_rate": 5e-06,
+      "loss": 0.75,
+      "step": 200
+    },
+    {
+      "epoch": 0.7636363636363637,
+      "grad_norm": 0.682148918886327,
+      "learning_rate": 5e-06,
+      "loss": 0.7476,
+      "step": 210
+    },
+    {
+      "epoch": 0.8,
+      "grad_norm": 0.8039336411015884,
+      "learning_rate": 5e-06,
+      "loss": 0.7462,
+      "step": 220
+    },
+    {
+      "epoch": 0.8363636363636363,
+      "grad_norm": 0.6876607052536684,
+      "learning_rate": 5e-06,
+      "loss": 0.7411,
+      "step": 230
+    },
+    {
+      "epoch": 0.8727272727272727,
+      "grad_norm": 0.6588151842699974,
+      "learning_rate": 5e-06,
+      "loss": 0.7469,
+      "step": 240
+    },
+    {
+      "epoch": 0.9090909090909091,
+      "grad_norm": 0.6715213794720472,
+      "learning_rate": 5e-06,
+      "loss": 0.7378,
+      "step": 250
+    },
+    {
+      "epoch": 0.9454545454545454,
+      "grad_norm": 0.5870957383826958,
+      "learning_rate": 5e-06,
+      "loss": 0.7457,
+      "step": 260
+    },
+    {
+      "epoch": 0.9818181818181818,
+      "grad_norm": 0.6643986810785624,
+      "learning_rate": 5e-06,
+      "loss": 0.7466,
+      "step": 270
+    },
+    {
+      "epoch": 1.0,
+      "eval_loss": 0.7417545914649963,
+      "eval_runtime": 26.6218,
+      "eval_samples_per_second": 278.268,
+      "eval_steps_per_second": 1.089,
+      "step": 275
+    },
+    {
+      "epoch": 1.018181818181818,
+      "grad_norm": 0.9781239153342394,
+      "learning_rate": 5e-06,
+      "loss": 0.7136,
+      "step": 280
+    },
+    {
+      "epoch": 1.0545454545454545,
+      "grad_norm": 0.7152925984087143,
+      "learning_rate": 5e-06,
+      "loss": 0.6871,
+      "step": 290
+    },
+    {
+      "epoch": 1.0909090909090908,
+      "grad_norm": 0.6929492576277494,
+      "learning_rate": 5e-06,
+      "loss": 0.6894,
+      "step": 300
+    },
+    {
+      "epoch": 1.1272727272727272,
+      "grad_norm": 0.728764264622129,
+      "learning_rate": 5e-06,
+      "loss": 0.6935,
+      "step": 310
+    },
+    {
+      "epoch": 1.1636363636363636,
+      "grad_norm": 0.7252517543389313,
+      "learning_rate": 5e-06,
+      "loss": 0.6945,
+      "step": 320
+    },
+    {
+      "epoch": 1.2,
+      "grad_norm": 0.6665160391388197,
+      "learning_rate": 5e-06,
+      "loss": 0.6886,
+      "step": 330
+    },
+    {
+      "epoch": 1.2363636363636363,
+      "grad_norm": 0.7161659905517039,
+      "learning_rate": 5e-06,
+      "loss": 0.6898,
+      "step": 340
+    },
+    {
+      "epoch": 1.2727272727272727,
+      "grad_norm": 0.5719039452566653,
+      "learning_rate": 5e-06,
+      "loss": 0.6934,
+      "step": 350
+    },
+    {
+      "epoch": 1.309090909090909,
+      "grad_norm": 0.6060853746189843,
+      "learning_rate": 5e-06,
+      "loss": 0.6922,
+      "step": 360
+    },
+    {
+      "epoch": 1.3454545454545455,
+      "grad_norm": 0.6563719933283224,
+      "learning_rate": 5e-06,
+      "loss": 0.6912,
+      "step": 370
+    },
+    {
+      "epoch": 1.3818181818181818,
+      "grad_norm": 0.5958006047997326,
+      "learning_rate": 5e-06,
+      "loss": 0.6904,
+      "step": 380
+    },
+    {
+      "epoch": 1.4181818181818182,
+      "grad_norm": 0.7430218105320606,
+      "learning_rate": 5e-06,
+      "loss": 0.688,
+      "step": 390
+    },
+    {
+      "epoch": 1.4545454545454546,
+      "grad_norm": 0.6322073230662588,
+      "learning_rate": 5e-06,
+      "loss": 0.6883,
+      "step": 400
+    },
+    {
+      "epoch": 1.490909090909091,
+      "grad_norm": 0.7151221978666452,
+      "learning_rate": 5e-06,
+      "loss": 0.6934,
+      "step": 410
+    },
+    {
+      "epoch": 1.5272727272727273,
+      "grad_norm": 0.6184168187218901,
+      "learning_rate": 5e-06,
+      "loss": 0.6916,
+      "step": 420
+    },
+    {
+      "epoch": 1.5636363636363635,
+      "grad_norm": 0.6280848540221795,
+      "learning_rate": 5e-06,
+      "loss": 0.6916,
+      "step": 430
+    },
+    {
+      "epoch": 1.6,
+      "grad_norm": 0.6568705155050817,
+      "learning_rate": 5e-06,
+      "loss": 0.6856,
+      "step": 440
+    },
+    {
+      "epoch": 1.6363636363636362,
+      "grad_norm": 0.6359258851827682,
+      "learning_rate": 5e-06,
+      "loss": 0.6851,
+      "step": 450
+    },
+    {
+      "epoch": 1.6727272727272728,
+      "grad_norm": 0.710888538426671,
+      "learning_rate": 5e-06,
+      "loss": 0.6872,
+      "step": 460
+    },
+    {
+      "epoch": 1.709090909090909,
+      "grad_norm": 0.7584066029266229,
+      "learning_rate": 5e-06,
+      "loss": 0.6849,
+      "step": 470
+    },
+    {
+      "epoch": 1.7454545454545456,
+      "grad_norm": 0.5960492892442344,
+      "learning_rate": 5e-06,
+      "loss": 0.6891,
+      "step": 480
+    },
+    {
+      "epoch": 1.7818181818181817,
+      "grad_norm": 0.5629377755020811,
+      "learning_rate": 5e-06,
+      "loss": 0.6847,
+      "step": 490
+    },
+    {
+      "epoch": 1.8181818181818183,
+      "grad_norm": 0.589716689792314,
+      "learning_rate": 5e-06,
+      "loss": 0.6871,
+      "step": 500
+    },
+    {
+      "epoch": 1.8545454545454545,
+      "grad_norm": 0.5740509121739076,
+      "learning_rate": 5e-06,
+      "loss": 0.6888,
+      "step": 510
+    },
+    {
+      "epoch": 1.8909090909090909,
+      "grad_norm": 0.5891046247600111,
+      "learning_rate": 5e-06,
+      "loss": 0.6884,
+      "step": 520
+    },
+    {
+      "epoch": 1.9272727272727272,
+      "grad_norm": 0.6447276827053491,
+      "learning_rate": 5e-06,
+      "loss": 0.6893,
+      "step": 530
+    },
+    {
+      "epoch": 1.9636363636363636,
+      "grad_norm": 0.6935516132206995,
+      "learning_rate": 5e-06,
+      "loss": 0.6868,
+      "step": 540
+    },
+    {
+      "epoch": 2.0,
+      "grad_norm": 0.5781509823001448,
+      "learning_rate": 5e-06,
+      "loss": 0.6841,
+      "step": 550
+    },
+    {
+      "epoch": 2.0,
+      "eval_loss": 0.7281343340873718,
+      "eval_runtime": 26.4698,
+      "eval_samples_per_second": 279.867,
+      "eval_steps_per_second": 1.096,
+      "step": 550
+    },
+    {
+      "epoch": 2.036363636363636,
+      "grad_norm": 0.7551729949207574,
+      "learning_rate": 5e-06,
+      "loss": 0.6351,
+      "step": 560
+    },
+    {
+      "epoch": 2.0727272727272728,
+      "grad_norm": 0.6070448901420726,
+      "learning_rate": 5e-06,
+      "loss": 0.6307,
+      "step": 570
+    },
+    {
+      "epoch": 2.109090909090909,
+      "grad_norm": 0.7225948313371118,
+      "learning_rate": 5e-06,
+      "loss": 0.6357,
+      "step": 580
+    },
+    {
+      "epoch": 2.1454545454545455,
+      "grad_norm": 1.3944109200671733,
+      "learning_rate": 5e-06,
+      "loss": 0.6375,
+      "step": 590
+    },
+    {
+      "epoch": 2.1818181818181817,
+      "grad_norm": 1.1390572133302885,
+      "learning_rate": 5e-06,
+      "loss": 0.635,
+      "step": 600
+    },
+    {
+      "epoch": 2.2181818181818183,
+      "grad_norm": 0.7900509422330505,
+      "learning_rate": 5e-06,
+      "loss": 0.6383,
+      "step": 610
+    },
+    {
+      "epoch": 2.2545454545454544,
+      "grad_norm": 0.594871030626621,
+      "learning_rate": 5e-06,
+      "loss": 0.6321,
+      "step": 620
+    },
+    {
+      "epoch": 2.290909090909091,
+      "grad_norm": 0.665898906007086,
+      "learning_rate": 5e-06,
+      "loss": 0.6341,
+      "step": 630
+    },
+    {
+      "epoch": 2.327272727272727,
+      "grad_norm": 0.6509722897169726,
+      "learning_rate": 5e-06,
+      "loss": 0.6326,
+      "step": 640
+    },
+    {
+      "epoch": 2.3636363636363638,
+      "grad_norm": 0.6231670817005929,
+      "learning_rate": 5e-06,
+      "loss": 0.6385,
+      "step": 650
+    },
+    {
+      "epoch": 2.4,
+      "grad_norm": 0.6425410588561774,
+      "learning_rate": 5e-06,
+      "loss": 0.6373,
+      "step": 660
+    },
+    {
+      "epoch": 2.4363636363636365,
+      "grad_norm": 0.621241338432262,
+      "learning_rate": 5e-06,
+      "loss": 0.6399,
+      "step": 670
+    },
+    {
+      "epoch": 2.4727272727272727,
+      "grad_norm": 0.6924233110335524,
+      "learning_rate": 5e-06,
+      "loss": 0.6393,
+      "step": 680
+    },
+    {
+      "epoch": 2.509090909090909,
+      "grad_norm": 0.6419114963815122,
+      "learning_rate": 5e-06,
+      "loss": 0.6405,
+      "step": 690
+    },
+    {
+      "epoch": 2.5454545454545454,
+      "grad_norm": 0.7336852368102121,
+      "learning_rate": 5e-06,
+      "loss": 0.6385,
+      "step": 700
+    },
+    {
+      "epoch": 2.581818181818182,
+      "grad_norm": 0.7922288944252411,
+      "learning_rate": 5e-06,
+      "loss": 0.6377,
+      "step": 710
+    },
+    {
+      "epoch": 2.618181818181818,
+      "grad_norm": 0.6500377491351792,
+      "learning_rate": 5e-06,
+      "loss": 0.6427,
+      "step": 720
+    },
+    {
+      "epoch": 2.6545454545454543,
+      "grad_norm": 0.6853834065254241,
+      "learning_rate": 5e-06,
+      "loss": 0.6346,
+      "step": 730
+    },
+    {
+      "epoch": 2.690909090909091,
+      "grad_norm": 0.8156333668312422,
+      "learning_rate": 5e-06,
+      "loss": 0.6414,
+      "step": 740
+    },
+    {
+      "epoch": 2.7272727272727275,
+      "grad_norm": 0.6294215183471213,
+      "learning_rate": 5e-06,
+      "loss": 0.6363,
+      "step": 750
+    },
+    {
+      "epoch": 2.7636363636363637,
+      "grad_norm": 0.8237171162592375,
+      "learning_rate": 5e-06,
+      "loss": 0.6421,
+      "step": 760
+    },
+    {
+      "epoch": 2.8,
+      "grad_norm": 0.6772752476166749,
+      "learning_rate": 5e-06,
+      "loss": 0.6356,
+      "step": 770
+    },
+    {
+      "epoch": 2.8363636363636364,
+      "grad_norm": 0.7780500988065099,
+      "learning_rate": 5e-06,
+      "loss": 0.6425,
+      "step": 780
+    },
+    {
+      "epoch": 2.8727272727272726,
+      "grad_norm": 0.6862874007983163,
+      "learning_rate": 5e-06,
+      "loss": 0.6368,
+      "step": 790
+    },
+    {
+      "epoch": 2.909090909090909,
+      "grad_norm": 0.5748210856771035,
+      "learning_rate": 5e-06,
+      "loss": 0.6405,
+      "step": 800
+    },
+    {
+      "epoch": 2.9454545454545453,
+      "grad_norm": 0.6351457621560951,
+      "learning_rate": 5e-06,
+      "loss": 0.6357,
+      "step": 810
+    },
+    {
+      "epoch": 2.981818181818182,
+      "grad_norm": 0.586627253325874,
+      "learning_rate": 5e-06,
+      "loss": 0.6412,
+      "step": 820
+    },
+    {
+      "epoch": 3.0,
+      "eval_loss": 0.7299705147743225,
+      "eval_runtime": 25.9228,
+      "eval_samples_per_second": 285.772,
+      "eval_steps_per_second": 1.119,
+      "step": 825
+    },
+    {
+      "epoch": 3.0,
+      "step": 825,
+      "total_flos": 1381905727488000.0,
+      "train_loss": 0.7044297796307188,
+      "train_runtime": 5353.5806,
+      "train_samples_per_second": 78.866,
+      "train_steps_per_second": 0.154
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 825,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 3,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": true
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 1381905727488000.0,
+  "train_batch_size": 16,
+  "trial_name": null,
+  "trial_params": null
+}

training_eval_loss.png ADDED Viewed

training_loss.png ADDED Viewed