experiment: seed: 42 name: gpt2_85M-bpe-text-03 group: babylm-small dry_run: false offline_run: false evaluate_segmentation: false evaluate_babyslm: true blimp_tasks: blimp_filtered,blimp_supplement resume_checkpoint_path: null resume_run_id: null dataset: name: transformersegmentation/BabyLM-phonemized subconfig: strict_small text_column: text is_phonemes: false max_age: null tokenizer: name: transformersegmentation/BabyLM-BPE-ortho-tokenizer data_preprocessing: max_input_length: 128 join_utts: static remove_word_boundaries: false subsample: null subsample_type: examples model: name: gpt2_lm model_kwargs: n_layer: 12 n_head: 12 n_embd: 768 n_positions: 256 n_inner: 3072 resid_pdrop: 0.3 embd_pdrop: 0.3 attn_pdrop: 0.3 trainer: batch_size: 32 lr: 0.001 num_warmup_steps: 90000 max_training_steps: 400000 logging_steps: 4000 save_steps: 50000 eval_steps: 50000