End of training

Files changed (6) hide show

README.md CHANGED Viewed

@@ -32,12 +32,12 @@ More information needed
 ### Training hyperparameters
 The following hyperparameters were used during training:
-- learning_rate: 0.0006741100367675095
-- train_batch_size: 52
 - eval_batch_size: 8
 - seed: 42
 - gradient_accumulation_steps: 5
-- total_train_batch_size: 260
 - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
 - lr_scheduler_type: linear
 - num_epochs: 4
@@ -46,7 +46,7 @@ The following hyperparameters were used during training:
 | Training Loss | Epoch | Step | Validation Loss |
 |:-------------:|:-----:|:----:|:---------------:|
-| No log        | 2.43  | 100  | 0.9711          |
 ### Framework versions

 ### Training hyperparameters
 The following hyperparameters were used during training:
+- learning_rate: 0.00023571124327298023
+- train_batch_size: 60
 - eval_batch_size: 8
 - seed: 42
 - gradient_accumulation_steps: 5
+- total_train_batch_size: 300
 - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
 - lr_scheduler_type: linear
 - num_epochs: 4
 | Training Loss | Epoch | Step | Validation Loss |
 |:-------------:|:-----:|:----:|:---------------:|
+| No log        | 2.79  | 100  | 1.1266          |
 ### Framework versions

adapter_config.json CHANGED Viewed

@@ -16,8 +16,8 @@
   "rank_pattern": {},
   "revision": null,
   "target_modules": [
-    "v",
-    "q"
   ],
   "task_type": "SEQ_2_SEQ_LM"
 }

   "rank_pattern": {},
   "revision": null,
   "target_modules": [
+    "q",
+    "v"
   ],
   "task_type": "SEQ_2_SEQ_LM"
 }

adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:4d1b2057d531f2a5429fcc79d54a8884c543e3f0e7359a624ad94a407a665485
 size 18915328

 version https://git-lfs.github.com/spec/v1
+oid sha256:40ae13711796b112ed4cc4860b5077af77d71b8202d675b792ec9d67e8ccfb54
 size 18915328

runs/Nov07_02-22-21_147bbb1e1699/events.out.tfevents.1699323745.147bbb1e1699.4282.0 ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:9a4002f80d1157e39d6460587c65a92e45067304c1a8d1609bfeac18d0c83377
+size 5396

trainer_state.json CHANGED Viewed

@@ -1,36 +1,36 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 3.9805825242718447,
   "eval_steps": 100,
-  "global_step": 164,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
   "log_history": [
     {
-      "epoch": 2.43,
-      "eval_loss": 0.9710711240768433,
-      "eval_runtime": 1487.3043,
-      "eval_samples_per_second": 7.203,
-      "eval_steps_per_second": 0.901,
       "step": 100
     },
     {
-      "epoch": 3.98,
-      "step": 164,
-      "total_flos": 9.889339416772608e+16,
-      "train_loss": 1.1787047967678164,
-      "train_runtime": 21689.2536,
-      "train_samples_per_second": 1.976,
-      "train_steps_per_second": 0.008
     }
   ],
   "logging_steps": 500,
-  "max_steps": 164,
   "num_train_epochs": 4,
   "save_steps": 500,
-  "total_flos": 9.889339416772608e+16,
   "trial_name": null,
   "trial_params": null
 }

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 3.910614525139665,
   "eval_steps": 100,
+  "global_step": 140,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
   "log_history": [
     {
+      "epoch": 2.79,
+      "eval_loss": 1.1265525817871094,
+      "eval_runtime": 577.4869,
+      "eval_samples_per_second": 18.551,
+      "eval_steps_per_second": 2.32,
       "step": 100
     },
     {
+      "epoch": 3.91,
+      "step": 140,
+      "total_flos": 9.721424741872435e+16,
+      "train_loss": 1.4816603524344307,
+      "train_runtime": 4588.7784,
+      "train_samples_per_second": 9.338,
+      "train_steps_per_second": 0.031
     }
   ],
   "logging_steps": 500,
+  "max_steps": 140,
   "num_train_epochs": 4,
   "save_steps": 500,
+  "total_flos": 9.721424741872435e+16,
   "trial_name": null,
   "trial_params": null
 }

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3afdb64a91e6b71d89a1d36421804cef1fa3e521341ea328400dcf79b3449517
 size 4536

 version https://git-lfs.github.com/spec/v1
+oid sha256:d9d32b53d138eb2a55a3f8fe697ea898dda14eb2e0e6691f38f463e75fc31d59
 size 4536