experiment: seed: 42 name: gpt2_5M-bpe-text-dyn group: babylm-small dry_run: false offline_run: false evaluate_segmentation: false evaluate_babyslm: true blimp_tasks: blimp_filtered,blimp_supplement resume_checkpoint_path: null resume_run_id: null dataset: name: transformersegmentation/BabyLM-phonemized subconfig: strict_small text_column: text is_phonemes: false max_age: null tokenizer: name: transformersegmentation/BabyLM-BPE-ortho-tokenizer data_preprocessing: max_input_length: 128 join_utts: dynamic remove_word_boundaries: false subsample: null subsample_type: examples model: name: gpt2_lm model_kwargs: n_layer: 6 n_head: 8 n_embd: 256 n_positions: 256 n_inner: 1024 trainer: batch_size: 32 lr: 0.001 num_warmup_steps: 50000 max_training_steps: 200000 logging_steps: 2000 save_steps: 25000 eval_steps: 25000