experiment: | |
seed: 42 | |
name: gpt2_85M-bpe-text | |
group: babylm-small | |
dry_run: false | |
offline_run: false | |
evaluate_segmentation: false | |
evaluate_babyslm: true | |
blimp_tasks: blimp_filtered,blimp_supplement | |
resume_checkpoint_path: null | |
resume_run_id: null | |
dataset: | |
name: transformersegmentation/BabyLM-phonemized | |
subconfig: strict_small | |
text_column: text | |
is_phonemes: false | |
max_age: null | |
tokenizer: | |
name: transformersegmentation/BabyLM-BPE-ortho-tokenizer | |
data_preprocessing: | |
max_input_length: 128 | |
join_utts: static | |
remove_word_boundaries: false | |
subsample: null | |
subsample_type: examples | |
model: | |
name: gpt2_lm | |
model_kwargs: | |
n_layer: 12 | |
n_head: 12 | |
n_embd: 768 | |
n_positions: 256 | |
n_inner: 3072 | |
trainer: | |
batch_size: 32 | |
lr: 0.001 | |
num_warmup_steps: 90000 | |
max_training_steps: 400000 | |
logging_steps: 4000 | |
save_steps: 50000 | |
eval_steps: 50000 | |