File size: 824 Bytes
ef8ca0f 3495541 ef8ca0f 6f5d92a ef8ca0f e8173c9 ef8ca0f b5b1732 ef8ca0f e8173c9 ef8ca0f 44dd6dc e8173c9 ef8ca0f 3495541 e8173c9 ef8ca0f e8173c9 ef8ca0f 3495541 e8173c9 badf588 ef8ca0f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 |
{
"act_fn": "gelu_new",
"n_layers": 2,
"d_model": 256,
"n_ctx": 2048,
"d_head": 32,
"d_mlp": -1,
"model_name": "custom",
"n_heads": 8,
"d_vocab": 50259,
"eps": 1e-05,
"use_attn_result": true,
"use_attn_scale": true,
"use_split_qkv_input": false,
"use_local_attn": false,
"from_checkpoint": false,
"init_mode": "gpt2",
"normalization_type": "LN",
"device": "cuda",
"attention_dir": "causal",
"attn_only": true,
"initializer_range": 0.05,
"init_weights": false,
"scale_attn_by_inverse_layer_idx": false,
"positional_embedding_type": "shortformer",
"shortformer_pos":
true,
"final_rms": false,
"d_vocab_out": 50259,
"parallel_attn_mlp": false,
"n_params": 524288,
"final_rms": false,
"use_hook_tokens": false,
"tokenizer_name": "ArthurConmy/redwood_tokenizer"
} |