redwood_attn_2l / config.json
ArthurConmy's picture
Update config.json
ef8ca0f
raw
history blame
681 Bytes
{
"n_layers": 2,
"d_model": 256,
"n_ctx": 2048,
"d_head": 32,
"model_name": "custom",
"n_heads": 8,
"d_vocab": 50259,
"eps": 1e-05,
"use_attn_result": True,
"use_attn_scale": True,
"use_split_qkv_input": False,
"use_local_attn": False,
"from_checkpoint": False,
"init_mode": "gpt2",
"normalization_type": "LN",
"device": "cuda",
"attention_dir": "causal",
"attn_only": True,
"initializer_range": 0.05,
"init_weights": True,
"scale_attn_by_inverse_layer_idx": False,
"positional_embedding_type": "shortformer",
"final_rms": False,
"d_vocab_out": 50259,
"parallel_attn_mlp": False,
"n_params": 524288,
"use_hook_tokens": False,
}