{ "n_layers": 6, "d_model": 128, "d_head": 64, "n_heads": 8, "d_mlp": 512, "d_vocab": 61, "n_ctx": 59, "act_fn": "gelu", "normalization_type": "LN", "att_only": False, "architecture": "mingpt" }