wissamantoun
commited on
Commit
•
76ae46f
1
Parent(s):
18d0924
Update run_pretraining.py and configuration_aragpt2.py
Browse files- README.md +19 -1
- configuration_aragpt2.py +1 -1
README.md
CHANGED
@@ -86,7 +86,25 @@ python create_pretraining_data.py
|
|
86 |
|
87 |
Finetuning:
|
88 |
```bash
|
89 |
-
python3 run_pretraining.py
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
90 |
```
|
91 |
# Model Sizes
|
92 |
|
|
|
86 |
|
87 |
Finetuning:
|
88 |
```bash
|
89 |
+
python3 run_pretraining.py \
|
90 |
+
--input_file="gs://<GS_BUCKET>/pretraining_data/*" \
|
91 |
+
--output_dir="gs://<GS_BUCKET>/pretraining_model/" \
|
92 |
+
--config_file="config/small_hparams.json" \
|
93 |
+
--batch_size=128 \
|
94 |
+
--eval_batch_size=8 \
|
95 |
+
--num_train_steps= \
|
96 |
+
--num_warmup_steps= \
|
97 |
+
--learning_rate= \
|
98 |
+
--save_checkpoints_steps= \
|
99 |
+
--max_seq_length=1024 \
|
100 |
+
--max_eval_steps= \
|
101 |
+
--optimizer="lamb" \
|
102 |
+
--iterations_per_loop=5000 \
|
103 |
+
--keep_checkpoint_max=10 \
|
104 |
+
--use_tpu=True \
|
105 |
+
--tpu_name=<TPU NAME> \
|
106 |
+
--do_train=True \
|
107 |
+
--do_eval=False
|
108 |
```
|
109 |
# Model Sizes
|
110 |
|
configuration_aragpt2.py
CHANGED
@@ -131,7 +131,7 @@ class AraGPT2Config(PretrainedConfig):
|
|
131 |
n_layer=12,
|
132 |
n_head=12,
|
133 |
n_inner=None,
|
134 |
-
activation_function="
|
135 |
resid_pdrop=0.1,
|
136 |
embd_pdrop=0.1,
|
137 |
attn_pdrop=0.1,
|
|
|
131 |
n_layer=12,
|
132 |
n_head=12,
|
133 |
n_inner=None,
|
134 |
+
activation_function="gelu_new",
|
135 |
resid_pdrop=0.1,
|
136 |
embd_pdrop=0.1,
|
137 |
attn_pdrop=0.1,
|