redwood_attn_2l / config.json
ArthurConmy's picture
Try something a little new
b5b1732
raw
history blame contribute delete
824 Bytes
{
"act_fn": "gelu_new",
"n_layers": 2,
"d_model": 256,
"n_ctx": 2048,
"d_head": 32,
"d_mlp": -1,
"model_name": "custom",
"n_heads": 8,
"d_vocab": 50259,
"eps": 1e-05,
"use_attn_result": true,
"use_attn_scale": true,
"use_split_qkv_input": false,
"use_local_attn": false,
"from_checkpoint": false,
"init_mode": "gpt2",
"normalization_type": "LN",
"device": "cuda",
"attention_dir": "causal",
"attn_only": true,
"initializer_range": 0.05,
"init_weights": false,
"scale_attn_by_inverse_layer_idx": false,
"positional_embedding_type": "shortformer",
"shortformer_pos":
true,
"final_rms": false,
"d_vocab_out": 50259,
"parallel_attn_mlp": false,
"n_params": 524288,
"final_rms": false,
"use_hook_tokens": false,
"tokenizer_name": "ArthurConmy/redwood_tokenizer"
}