File size: 965 Bytes
889c086 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 |
experiment:
seed: 42
name: gpt2_5M-bpe-text-dyn-03
group: babylm-small
dry_run: false
offline_run: false
evaluate_segmentation: false
evaluate_babyslm: true
blimp_tasks: blimp_filtered,blimp_supplement
resume_checkpoint_path: null
resume_run_id: null
dataset:
name: transformersegmentation/BabyLM-phonemized
subconfig: strict_small
text_column: text
is_phonemes: false
max_age: null
tokenizer:
name: transformersegmentation/BabyLM-BPE-ortho-tokenizer
data_preprocessing:
max_input_length: 128
join_utts: dynamic
remove_word_boundaries: false
subsample: null
subsample_type: examples
model:
name: gpt2_lm
model_kwargs:
n_layer: 6
n_head: 8
n_embd: 256
n_positions: 256
n_inner: 1024
resid_pdrop: 0.3
embd_pdrop: 0.3
attn_pdrop: 0.3
trainer:
batch_size: 32
lr: 0.001
num_warmup_steps: 100000
max_training_steps: 600000
logging_steps: 6000
save_steps: 25000
eval_steps: 25000
|