{ "act_fn": "gelu_new", "n_layers": 2, "d_model": 256, "n_ctx": 2048, "d_head": 32, "d_mlp": -1, "model_name": "custom", "n_heads": 8, "d_vocab": 50259, "eps": 1e-05, "use_attn_result": true, "use_attn_scale": true, "use_split_qkv_input": false, "use_local_attn": false, "from_checkpoint": false, "init_mode": "gpt2", "normalization_type": "LN", "device": "cuda", "attention_dir": "causal", "attn_only": true, "initializer_range": 0.05, "init_weights": false, "scale_attn_by_inverse_layer_idx": false, "positional_embedding_type": "shortformer", "shortformer_pos": true, "final_rms": false, "d_vocab_out": 50259, "parallel_attn_mlp": false, "n_params": 524288, "final_rms": false, "use_hook_tokens": false, "tokenizer_name": "ArthurConmy/redwood_tokenizer" }