Text Generation
scaling
File size: 1,399 Bytes
7efcdb5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
{
    "transformer_architecture": {
        "vocab_size": 128000,
        "vocab_file": "vocab.json",
        "hidden_size": 4608,
        "num_layers": 27,
        "num_attention_heads": 36,
        "num_local_attention_heads": 0,
        "local_attention_window_size": null,
        "rotary_embedding_base": 1000000,
        "rotary_percentage": 1.0,
        "sequence_length": 8192,
        "norm_type": "layernorm",
        "relative_position_embedding_type": "rotary_complex",
        "mlp_type": "default",
        "mlp_factor": 4.0,
        "attention_bias": true,
        "attention_qkv_in_one": false,
        "attention_num_kv_heads": 4,
        "attention_use_matmul": false,
        "mlp_bias": true,
        "key_query_norm": false,
        "weight_tying": false,
        "masked_softmax": {
            "kernel": "torch",
            "softmax_in_fp32": true,
            "scale": 1.0,
            "deterministic_flash_attn_bwd": false
        },
        "layernorm": {
            "optimization_type": "torch",
            "layernorm_epsilon": 1e-05
        },
        "precision": "bfloat16",
        "dropout_embedding": 0.0,
        "dropout_attention_probs": 0.0,
        "dropout_after_attention": 0.0,
        "dropout_after_mlp": 0.0,
        "finetunable_token_ids": [],
        "image_encoder": false,
        "dropout_image_encoder": 0.0,
        "lora_config": null
    }
}