End of training

Browse files

Files changed (8) hide show

README.md +2 -2
adapter_config.json +8 -2
adapter_model.bin +2 -2
adapter_model.safetensors +2 -2
all_results.json +6 -6
train_results.json +6 -6
trainer_state.json +119 -17
training_args.bin +1 -1

README.md CHANGED Viewed

@@ -34,13 +34,13 @@ More information needed
 The following hyperparameters were used during training:
 - learning_rate: 0.0002
-- train_batch_size: 1
 - eval_batch_size: 8
 - seed: 42
 - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
 - lr_scheduler_type: linear
 - lr_scheduler_warmup_steps: 2
-- training_steps: 3
 - mixed_precision_training: Native AMP
 ### Training results

 The following hyperparameters were used during training:
 - learning_rate: 0.0002
+- train_batch_size: 4
 - eval_batch_size: 8
 - seed: 42
 - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
 - lr_scheduler_type: linear
 - lr_scheduler_warmup_steps: 2
+- training_steps: 20
 - mixed_precision_training: Native AMP
 ### Training results

adapter_config.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
   "alpha_pattern": {},
   "auto_mapping": null,
-  "base_model_name_or_path": null,
   "bias": "none",
   "fan_in_fan_out": false,
   "inference_mode": true,
@@ -16,7 +16,13 @@
   "rank_pattern": {},
   "revision": null,
   "target_modules": [
-    "base_layer"
   ],
   "task_type": "SEQ_CLS"
 }

 {
   "alpha_pattern": {},
   "auto_mapping": null,
+  "base_model_name_or_path": "meta-llama/Llama-2-7b-hf",
   "bias": "none",
   "fan_in_fan_out": false,
   "inference_mode": true,
   "rank_pattern": {},
   "revision": null,
   "target_modules": [
+    "o_proj",
+    "v_proj",
+    "up_proj",
+    "down_proj",
+    "k_proj",
+    "q_proj",
+    "gate_proj"
   ],
   "task_type": "SEQ_CLS"
 }

adapter_model.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:11d1c82228579d983d2866efc6c53db9e6f4f7fa8c9b39ec57bf88c0994264ab
-size 320373198

 version https://git-lfs.github.com/spec/v1
+oid sha256:09c2b7002ff788d88d15bd99682d6628ff38f37610acdc8fc2a94ba41d5fb965
+size 160283150

adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3d403c5891130d9cf0429788d8aed07ac05883ec05cb5d5eb4c6cd826bc7089b
-size 320169624

 version https://git-lfs.github.com/spec/v1
+oid sha256:eb6d31e55d2ba6ceda27724d1db9bd07caa18ffbb27cc008f54b0abe21e0dffa
+size 160180976

all_results.json CHANGED Viewed

@@ -1,8 +1,8 @@
 {
-    "epoch": 0.0,
-    "total_flos": 32571857534976.0,
-    "train_loss": 6.678385416666667,
-    "train_runtime": 6.9231,
-    "train_samples_per_second": 0.433,
-    "train_steps_per_second": 0.433
 }

 {
+    "epoch": 0.12,
+    "total_flos": 825752514723840.0,
+    "train_loss": 0.38747100830078124,
+    "train_runtime": 30.2879,
+    "train_samples_per_second": 2.641,
+    "train_steps_per_second": 0.66
 }

train_results.json CHANGED Viewed

@@ -1,8 +1,8 @@
 {
-    "epoch": 0.0,
-    "total_flos": 32571857534976.0,
-    "train_loss": 6.678385416666667,
-    "train_runtime": 6.9231,
-    "train_samples_per_second": 0.433,
-    "train_steps_per_second": 0.433
 }

 {
+    "epoch": 0.12,
+    "total_flos": 825752514723840.0,
+    "train_loss": 0.38747100830078124,
+    "train_runtime": 30.2879,
+    "train_samples_per_second": 2.641,
+    "train_steps_per_second": 0.66
 }

trainer_state.json CHANGED Viewed

@@ -1,46 +1,148 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.004594180704441042,
   "eval_steps": 500,
-  "global_step": 3,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
   "log_history": [
     {
-      "epoch": 0.0,
       "learning_rate": 0.0,
-      "loss": 6.625,
       "step": 1
     },
     {
-      "epoch": 0.0,
       "learning_rate": 0.0,
-      "loss": 6.1719,
       "step": 2
     },
     {
-      "epoch": 0.0,
       "learning_rate": 0.0,
-      "loss": 7.2383,
       "step": 3
     },
     {
-      "epoch": 0.0,
-      "step": 3,
-      "total_flos": 32571857534976.0,
-      "train_loss": 6.678385416666667,
-      "train_runtime": 6.9231,
-      "train_samples_per_second": 0.433,
-      "train_steps_per_second": 0.433
     }
   ],
   "logging_steps": 1,
-  "max_steps": 3,
   "num_train_epochs": 1,
   "save_steps": 500,
-  "total_flos": 32571857534976.0,
   "trial_name": null,
   "trial_params": null
 }

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.12195121951219512,
   "eval_steps": 500,
+  "global_step": 20,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
   "log_history": [
     {
+      "epoch": 0.01,
       "learning_rate": 0.0,
+      "loss": 1.1529,
       "step": 1
     },
     {
+      "epoch": 0.01,
       "learning_rate": 0.0,
+      "loss": 1.7234,
       "step": 2
     },
     {
+      "epoch": 0.02,
       "learning_rate": 0.0,
+      "loss": 1.1025,
       "step": 3
     },
     {
+      "epoch": 0.02,
+      "learning_rate": 0.0,
+      "loss": 0.8796,
+      "step": 4
+    },
+    {
+      "epoch": 0.03,
+      "learning_rate": 0.0,
+      "loss": 0.9621,
+      "step": 5
+    },
+    {
+      "epoch": 0.04,
+      "learning_rate": 0.0001,
+      "loss": 1.2671,
+      "step": 6
+    },
+    {
+      "epoch": 0.04,
+      "learning_rate": 0.0002,
+      "loss": 0.6072,
+      "step": 7
+    },
+    {
+      "epoch": 0.05,
+      "learning_rate": 0.00018888888888888888,
+      "loss": 0.0546,
+      "step": 8
+    },
+    {
+      "epoch": 0.05,
+      "learning_rate": 0.00017777777777777779,
+      "loss": 0.0,
+      "step": 9
+    },
+    {
+      "epoch": 0.06,
+      "learning_rate": 0.0001666666666666667,
+      "loss": 0.0,
+      "step": 10
+    },
+    {
+      "epoch": 0.07,
+      "learning_rate": 0.00015555555555555556,
+      "loss": 0.0,
+      "step": 11
+    },
+    {
+      "epoch": 0.07,
+      "learning_rate": 0.00014444444444444444,
+      "loss": 0.0,
+      "step": 12
+    },
+    {
+      "epoch": 0.08,
+      "learning_rate": 0.00013333333333333334,
+      "loss": 0.0,
+      "step": 13
+    },
+    {
+      "epoch": 0.09,
+      "learning_rate": 0.00012222222222222224,
+      "loss": 0.0,
+      "step": 14
+    },
+    {
+      "epoch": 0.09,
+      "learning_rate": 0.00011111111111111112,
+      "loss": 0.0,
+      "step": 15
+    },
+    {
+      "epoch": 0.1,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 16
+    },
+    {
+      "epoch": 0.1,
+      "learning_rate": 8.888888888888889e-05,
+      "loss": 0.0,
+      "step": 17
+    },
+    {
+      "epoch": 0.11,
+      "learning_rate": 7.777777777777778e-05,
+      "loss": 0.0,
+      "step": 18
+    },
+    {
+      "epoch": 0.12,
+      "learning_rate": 6.666666666666667e-05,
+      "loss": 0.0,
+      "step": 19
+    },
+    {
+      "epoch": 0.12,
+      "learning_rate": 5.555555555555556e-05,
+      "loss": 0.0,
+      "step": 20
+    },
+    {
+      "epoch": 0.12,
+      "step": 20,
+      "total_flos": 825752514723840.0,
+      "train_loss": 0.38747100830078124,
+      "train_runtime": 30.2879,
+      "train_samples_per_second": 2.641,
+      "train_steps_per_second": 0.66
     }
   ],
   "logging_steps": 1,
+  "max_steps": 20,
   "num_train_epochs": 1,
   "save_steps": 500,
+  "total_flos": 825752514723840.0,
   "trial_name": null,
   "trial_params": null
 }

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e25ab091eda0fcef4e0a9e37f22496431e57d92ab798a86fa7a60f8d8ed78a5d
 size 4600

 version https://git-lfs.github.com/spec/v1
+oid sha256:0c98373fb85d7bc426671be530b1abe4c64f097fe2b70dadd3f279a36bcf6587
 size 4600