Model save

Files changed (7) hide show

README.md CHANGED Viewed

@@ -2,15 +2,12 @@
 license: apache-2.0
 base_model: mistralai/Mistral-7B-Instruct-v0.2
 tags:
-- alignment-handbook
-- trl
-- sft
-- generated_from_trainer
 - trl
 - sft
 - generated_from_trainer
 datasets:
-- preference-data
 model-index:
 - name: prometheus-7b-direct-0.1p-rare-seed42
   results: []
@@ -21,7 +18,7 @@ should probably proofread and complete it, then remove this comment. -->
 # prometheus-7b-direct-0.1p-rare-seed42
-This model is a fine-tuned version of [mistralai/Mistral-7B-Instruct-v0.2](https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2) on the preference-data dataset.
 It achieves the following results on the evaluation set:
 - Loss: 0.3326

 license: apache-2.0
 base_model: mistralai/Mistral-7B-Instruct-v0.2
 tags:
 - trl
 - sft
+- alignment-handbook
 - generated_from_trainer
 datasets:
+- generator
 model-index:
 - name: prometheus-7b-direct-0.1p-rare-seed42
   results: []
 # prometheus-7b-direct-0.1p-rare-seed42
+This model is a fine-tuned version of [mistralai/Mistral-7B-Instruct-v0.2](https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2) on the generator dataset.
 It achieves the following results on the evaluation set:
 - Loss: 0.3326

all_results.json CHANGED Viewed

@@ -6,9 +6,9 @@
     "eval_samples_per_second": 3.103,
     "eval_steps_per_second": 0.781,
     "total_flos": 214352422502400.0,
-    "train_loss": 0.4881494865985587,
-    "train_runtime": 23473.6027,
     "train_samples": 108847,
-    "train_samples_per_second": 1.397,
-    "train_steps_per_second": 0.044
 }

     "eval_samples_per_second": 3.103,
     "eval_steps_per_second": 0.781,
     "total_flos": 214352422502400.0,
+    "train_loss": 0.0,
+    "train_runtime": 0.0086,
     "train_samples": 108847,
+    "train_samples_per_second": 3817651.704,
+    "train_steps_per_second": 119261.58
 }

config.json CHANGED Viewed

@@ -22,6 +22,6 @@
   "tie_word_embeddings": false,
   "torch_dtype": "bfloat16",
   "transformers_version": "4.43.4",
-  "use_cache": true,
   "vocab_size": 32000
 }

   "tie_word_embeddings": false,
   "torch_dtype": "bfloat16",
   "transformers_version": "4.43.4",
+  "use_cache": false,
   "vocab_size": 32000
 }

runs/Aug07_15-38-10_COE-CS-sv003/events.out.tfevents.1723045127.COE-CS-sv003.3042844.0 ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:131f5e35638ea8c0bd9d81ee1c016ba30602d17c8b71b7d01a71941f507f9d46
+size 5594

train_results.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
     "epoch": 0.9995119570522206,
     "total_flos": 214352422502400.0,
-    "train_loss": 0.4881494865985587,
-    "train_runtime": 23473.6027,
     "train_samples": 108847,
-    "train_samples_per_second": 1.397,
-    "train_steps_per_second": 0.044
 }

 {
     "epoch": 0.9995119570522206,
     "total_flos": 214352422502400.0,
+    "train_loss": 0.0,
+    "train_runtime": 0.0086,
     "train_samples": 108847,
+    "train_samples_per_second": 3817651.704,
+    "train_steps_per_second": 119261.58
 }

trainer_state.json CHANGED Viewed

@@ -1455,10 +1455,10 @@
       "epoch": 0.9995119570522206,
       "step": 1024,
       "total_flos": 214352422502400.0,
-      "train_loss": 0.4881494865985587,
-      "train_runtime": 23473.6027,
-      "train_samples_per_second": 1.397,
-      "train_steps_per_second": 0.044
     }
   ],
   "logging_steps": 5,

       "epoch": 0.9995119570522206,
       "step": 1024,
       "total_flos": 214352422502400.0,
+      "train_loss": 0.0,
+      "train_runtime": 0.0086,
+      "train_samples_per_second": 3817651.704,
+      "train_steps_per_second": 119261.58
     }
   ],
   "logging_steps": 5,

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ef4a78f35bc06f6fedeb47ccbbc51575f24238258b43c7df467a45ea177aab6d
 size 6584

 version https://git-lfs.github.com/spec/v1
+oid sha256:e1cba00d12aee2545a7c0b39bf822788259fedbaa6e9867267dba6441b7501c3
 size 6584