File size: 925 Bytes
afc7050 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 |
# Note that some of the fields in this template haven't been filled in yet.
# Please resolve any `null` fields before launching!
precision: amp_bf16
max_seq_len: 32768
# Tokenizer for dataset creation
tokenizer_name: bert-base-uncased
# Base model config
model:
name: bert
pretrained_model_name: ${tokenizer_name}
tokenizer_name: ${tokenizer_name}
model_config:
num_attention_heads: 12
num_hidden_layers: 12
attention_probs_dropout_prob: 0.0
max_position_embeddings: 32768
monarch_mixer_sequence_mixing: True
long_conv_l_max: 32768
long_conv_kernel_learning_rate: 1e-3
hyena_lr_pos_emb: 1e-5
hyena_w: 10
hyena_wd: 0.1
hyena_emb_dim: 5
hyena_filter_order: 128
hyena_training_additions: False
bidirectional: true
residual_long_conv: true
use_glu_mlp: True
use_monarch_mlp: True
monarch_mlp_nblocks: 4
use_positional_encodings: True
|