{ "transformer_architecture": { "vocab_size": 128000, "vocab_file": "vocab.json", "hidden_size": 4608, "num_layers": 27, "num_attention_heads": 36, "num_local_attention_heads": 0, "local_attention_window_size": null, "rotary_embedding_base": 1000000, "rotary_percentage": 1.0, "sequence_length": 8192, "norm_type": "layernorm", "relative_position_embedding_type": "rotary_complex", "mlp_type": "default", "mlp_factor": 4.0, "attention_bias": true, "attention_qkv_in_one": false, "attention_num_kv_heads": 4, "attention_use_matmul": false, "mlp_bias": true, "key_query_norm": false, "weight_tying": false, "masked_softmax": { "kernel": "torch", "softmax_in_fp32": true, "scale": 1.0, "deterministic_flash_attn_bwd": false }, "layernorm": { "optimization_type": "torch", "layernorm_epsilon": 1e-05 }, "precision": "bfloat16", "dropout_embedding": 0.0, "dropout_attention_probs": 0.0, "dropout_after_attention": 0.0, "dropout_after_mlp": 0.0, "finetunable_token_ids": [], "image_encoder": false, "dropout_image_encoder": 0.0, "lora_config": null } }