andreaskoepf
commited on
Commit
•
20cb060
1
Parent(s):
6dee96a
Update README.md
Browse files
README.md
CHANGED
@@ -9,7 +9,6 @@ checkpoint: 11000 step (2 epochs)
|
|
9 |
datasets:
|
10 |
```
|
11 |
pretrain:
|
12 |
-
num_train_epochs: 1
|
13 |
weight_decay: 0.01
|
14 |
use_custom_sampler: true
|
15 |
sort_by_length: false
|
@@ -54,4 +53,6 @@ pythia-1.4b-pretrain:
|
|
54 |
per_device_eval_batch_size: 16
|
55 |
num_train_epochs: 2
|
56 |
save_total_limit: 2
|
57 |
-
```
|
|
|
|
|
|
9 |
datasets:
|
10 |
```
|
11 |
pretrain:
|
|
|
12 |
weight_decay: 0.01
|
13 |
use_custom_sampler: true
|
14 |
sort_by_length: false
|
|
|
53 |
per_device_eval_batch_size: 16
|
54 |
num_train_epochs: 2
|
55 |
save_total_limit: 2
|
56 |
+
```
|
57 |
+
|
58 |
+
command: `deepspeed trainer_sft.py --configs defaults pretrain pythia-1.4b-pretrain --cache_dir .cache/ --output_dir .saved_models/pythia-1.4b-pre --residual_dropout 0.0 --deepspeed`
|