kanishka commited on
Commit
c770f39
1 Parent(s): 6c2f4a4

End of training

Browse files
Files changed (5) hide show
  1. README.md +14 -2
  2. all_results.json +10 -10
  3. eval_results.json +6 -6
  4. train_results.json +4 -4
  5. trainer_state.json +0 -0
README.md CHANGED
@@ -2,11 +2,23 @@
2
  library_name: transformers
3
  tags:
4
  - generated_from_trainer
 
 
5
  metrics:
6
  - accuracy
7
  model-index:
8
  - name: opt-babylm2-20-epochs_seed-42_3e-4
9
- results: []
 
 
 
 
 
 
 
 
 
 
10
  ---
11
 
12
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
@@ -14,7 +26,7 @@ should probably proofread and complete it, then remove this comment. -->
14
 
15
  # opt-babylm2-20-epochs_seed-42_3e-4
16
 
17
- This model was trained from scratch on an unknown dataset.
18
  It achieves the following results on the evaluation set:
19
  - Loss: 2.4950
20
  - Accuracy: 0.5193
 
2
  library_name: transformers
3
  tags:
4
  - generated_from_trainer
5
+ datasets:
6
+ - kanishka/babylm2-sentence-tokenized
7
  metrics:
8
  - accuracy
9
  model-index:
10
  - name: opt-babylm2-20-epochs_seed-42_3e-4
11
+ results:
12
+ - task:
13
+ name: Causal Language Modeling
14
+ type: text-generation
15
+ dataset:
16
+ name: kanishka/babylm2-sentence-tokenized
17
+ type: kanishka/babylm2-sentence-tokenized
18
+ metrics:
19
+ - name: Accuracy
20
+ type: accuracy
21
+ value: 0.5192642005255711
22
  ---
23
 
24
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 
26
 
27
  # opt-babylm2-20-epochs_seed-42_3e-4
28
 
29
+ This model was trained from scratch on the kanishka/babylm2-sentence-tokenized dataset.
30
  It achieves the following results on the evaluation set:
31
  - Loss: 2.4950
32
  - Accuracy: 0.5193
all_results.json CHANGED
@@ -1,16 +1,16 @@
1
  {
2
  "epoch": 20.0,
3
- "eval_accuracy": 0.5192785875103685,
4
- "eval_loss": 2.4960787296295166,
5
- "eval_runtime": 183.9254,
6
  "eval_samples": 70325,
7
- "eval_samples_per_second": 382.356,
8
- "eval_steps_per_second": 5.975,
9
- "perplexity": 12.134816647007495,
10
  "total_flos": 1.7890534785024e+18,
11
- "train_loss": 2.2675810582360225,
12
- "train_runtime": 92416.4259,
13
  "train_samples": 684695,
14
- "train_samples_per_second": 148.176,
15
- "train_steps_per_second": 4.631
16
  }
 
1
  {
2
  "epoch": 20.0,
3
+ "eval_accuracy": 0.5192642005255711,
4
+ "eval_loss": 2.495042085647583,
5
+ "eval_runtime": 139.669,
6
  "eval_samples": 70325,
7
+ "eval_samples_per_second": 503.512,
8
+ "eval_steps_per_second": 7.869,
9
+ "perplexity": 12.12224368032957,
10
  "total_flos": 1.7890534785024e+18,
11
+ "train_loss": 2.267101509945356,
12
+ "train_runtime": 54453.6513,
13
  "train_samples": 684695,
14
+ "train_samples_per_second": 251.478,
15
+ "train_steps_per_second": 7.859
16
  }
eval_results.json CHANGED
@@ -1,10 +1,10 @@
1
  {
2
  "epoch": 20.0,
3
- "eval_accuracy": 0.5192785875103685,
4
- "eval_loss": 2.4960787296295166,
5
- "eval_runtime": 183.9254,
6
  "eval_samples": 70325,
7
- "eval_samples_per_second": 382.356,
8
- "eval_steps_per_second": 5.975,
9
- "perplexity": 12.134816647007495
10
  }
 
1
  {
2
  "epoch": 20.0,
3
+ "eval_accuracy": 0.5192642005255711,
4
+ "eval_loss": 2.495042085647583,
5
+ "eval_runtime": 139.669,
6
  "eval_samples": 70325,
7
+ "eval_samples_per_second": 503.512,
8
+ "eval_steps_per_second": 7.869,
9
+ "perplexity": 12.12224368032957
10
  }
train_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "epoch": 20.0,
3
  "total_flos": 1.7890534785024e+18,
4
- "train_loss": 2.2675810582360225,
5
- "train_runtime": 92416.4259,
6
  "train_samples": 684695,
7
- "train_samples_per_second": 148.176,
8
- "train_steps_per_second": 4.631
9
  }
 
1
  {
2
  "epoch": 20.0,
3
  "total_flos": 1.7890534785024e+18,
4
+ "train_loss": 2.267101509945356,
5
+ "train_runtime": 54453.6513,
6
  "train_samples": 684695,
7
+ "train_samples_per_second": 251.478,
8
+ "train_steps_per_second": 7.859
9
  }
trainer_state.json CHANGED
The diff for this file is too large to render. See raw diff