{ "activation": "relu_squared", "bias": false, "d_model": 512, "dropout": 0.2, "hidden_dim": 2048, "mlp": "GLU", "num_heads": 32, "num_kv_heads": 0, "num_layers": 32, "seq_len": 256, "vocab_size": 50257, "weight_tying": true }