kanishka
/

opt-babylm2-20-epochs_seed-42_3e-4

@@ -2,11 +2,23 @@
 library_name: transformers
 tags:
 - generated_from_trainer
 metrics:
 - accuracy
 model-index:
 - name: opt-babylm2-20-epochs_seed-42_3e-4
-  results: []
 ---
 <!-- This model card has been generated automatically according to the information the Trainer had access to. You
@@ -14,7 +26,7 @@ should probably proofread and complete it, then remove this comment. -->
 # opt-babylm2-20-epochs_seed-42_3e-4
-This model was trained from scratch on an unknown dataset.
 It achieves the following results on the evaluation set:
 - Loss: 2.4950
 - Accuracy: 0.5193

 library_name: transformers
 tags:
 - generated_from_trainer
+datasets:
+- kanishka/babylm2-sentence-tokenized
 metrics:
 - accuracy
 model-index:
 - name: opt-babylm2-20-epochs_seed-42_3e-4
+  results:
+  - task:
+      name: Causal Language Modeling
+      type: text-generation
+    dataset:
+      name: kanishka/babylm2-sentence-tokenized
+      type: kanishka/babylm2-sentence-tokenized
+    metrics:
+    - name: Accuracy
+      type: accuracy
+      value: 0.5192642005255711
 ---
 <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 # opt-babylm2-20-epochs_seed-42_3e-4
+This model was trained from scratch on the kanishka/babylm2-sentence-tokenized dataset.
 It achieves the following results on the evaluation set:
 - Loss: 2.4950
 - Accuracy: 0.5193

all_results.json CHANGED Viewed

@@ -1,16 +1,16 @@
 {
     "epoch": 20.0,
-    "eval_accuracy": 0.5192785875103685,
-    "eval_loss": 2.4960787296295166,
-    "eval_runtime": 183.9254,
     "eval_samples": 70325,
-    "eval_samples_per_second": 382.356,
-    "eval_steps_per_second": 5.975,
-    "perplexity": 12.134816647007495,
     "total_flos": 1.7890534785024e+18,
-    "train_loss": 2.2675810582360225,
-    "train_runtime": 92416.4259,
     "train_samples": 684695,
-    "train_samples_per_second": 148.176,
-    "train_steps_per_second": 4.631
 }

 {
     "epoch": 20.0,
+    "eval_accuracy": 0.5192642005255711,
+    "eval_loss": 2.495042085647583,
+    "eval_runtime": 139.669,
     "eval_samples": 70325,
+    "eval_samples_per_second": 503.512,
+    "eval_steps_per_second": 7.869,
+    "perplexity": 12.12224368032957,
     "total_flos": 1.7890534785024e+18,
+    "train_loss": 2.267101509945356,
+    "train_runtime": 54453.6513,
     "train_samples": 684695,
+    "train_samples_per_second": 251.478,
+    "train_steps_per_second": 7.859
 }

eval_results.json CHANGED Viewed

@@ -1,10 +1,10 @@
 {
     "epoch": 20.0,
-    "eval_accuracy": 0.5192785875103685,
-    "eval_loss": 2.4960787296295166,
-    "eval_runtime": 183.9254,
     "eval_samples": 70325,
-    "eval_samples_per_second": 382.356,
-    "eval_steps_per_second": 5.975,
-    "perplexity": 12.134816647007495
 }

 {
     "epoch": 20.0,
+    "eval_accuracy": 0.5192642005255711,
+    "eval_loss": 2.495042085647583,
+    "eval_runtime": 139.669,
     "eval_samples": 70325,
+    "eval_samples_per_second": 503.512,
+    "eval_steps_per_second": 7.869,
+    "perplexity": 12.12224368032957
 }

train_results.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
     "epoch": 20.0,
     "total_flos": 1.7890534785024e+18,
-    "train_loss": 2.2675810582360225,
-    "train_runtime": 92416.4259,
     "train_samples": 684695,
-    "train_samples_per_second": 148.176,
-    "train_steps_per_second": 4.631
 }

 {
     "epoch": 20.0,
     "total_flos": 1.7890534785024e+18,
+    "train_loss": 2.267101509945356,
+    "train_runtime": 54453.6513,
     "train_samples": 684695,
+    "train_samples_per_second": 251.478,
+    "train_steps_per_second": 7.859
 }

trainer_state.json CHANGED Viewed

The diff for this file is too large to render. See raw diff