Update README.md
Browse files
README.md
CHANGED
@@ -1,3 +1,28 @@
|
|
1 |
-
---
|
2 |
-
license: apache-2.0
|
3 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
license: apache-2.0
|
3 |
+
datasets:
|
4 |
+
- Skylion007/openwebtext
|
5 |
+
language:
|
6 |
+
- en
|
7 |
+
pipeline_tag: text-generation
|
8 |
+
---
|
9 |
+
### gpt trained with nanoGPT
|
10 |
+
|
11 |
+
Configs:
|
12 |
+
- batch size = 32
|
13 |
+
- bias = False
|
14 |
+
- bloack_size = 1024
|
15 |
+
- n heads = 8
|
16 |
+
- h layers = 6
|
17 |
+
- dropout = 0.0
|
18 |
+
- n embed = 768
|
19 |
+
- vocab size = 50304
|
20 |
+
- gradient_accumulation_steps = 1
|
21 |
+
- learning_rate = 1e-3
|
22 |
+
- iters = 7250
|
23 |
+
- lr_decay_iters = 5000
|
24 |
+
- min_lr = 1e-5
|
25 |
+
- warmup_iters = 400
|
26 |
+
- mfu = 30.45935
|
27 |
+
- train_loss = 3.89759
|
28 |
+
- val_loss = 3.91001
|