!!python/object:aether.model.model.TransformerLensModelArguments | |
implementation: transformer_lens | |
model_name: default | |
n_layers: 2 | |
model_seed: 0 | |
d_model: 256 | |
n_ctx: 1024 | |
d_head: 32 | |
n_heads: 32 | |
act_fn: gelu | |
d_vocab: 5000 | |
use_local_attn: false | |
tokenizer_name: georgeyw/TinyStories-tokenizer-5k | |
window_size: null | |
attn_types: null | |
attn_only: true | |
positional_embedding_type: shortformer | |