nyun-c2-llama3-56B / config.json
Ubuntu
initial commit
e87dc72
{
"_name_or_path": "meta-llama/Meta-Llama-3-70B",
"architectures": [
"LlamaForCausalLM"
],
"auto_map": {
"AutoConfig": "configuration_llama.LlamaConfig",
"AutoModelForCausalLM": "modeling_llama.LlamaForCausalLM"
},
"attention_bias": false,
"attention_dropout": 0.0,
"bos_token_id": 128000,
"eos_token_id": 128001,
"hidden_act": "silu",
"hidden_size": 8192,
"initializer_range": 0.02,
"intermediate_size": [
28672,
28672,
28672,
28672,
28672,
28672,
28672,
28672,
28672,
28672,
28672,
28672,
28672,
28672,
28672,
28672,
28672,
28672,
28672,
28672,
28672,
28672,
28672,
28672,
28672,
28672,
28672,
28672,
28672,
28672,
28672,
28672,
28672,
28672,
28672,
28672,
28672,
28672,
28672,
28672,
28672,
28672,
28672,
28672,
28672,
28672,
28672,
28672,
28672,
28672,
7166,
6906,
10143,
12633,
7212,
22485,
10135,
7058,
6890,
8751,
13120,
6421,
13147,
5797,
6510,
5491,
8320,
7594,
9199,
4712,
4590,
8467,
5990,
28343,
5626,
9352,
8924,
28672,
28672,
28672
],
"max_position_embeddings": 8192,
"mlp_bias": false,
"model_type": "llama",
"num_attention_heads": [
64,
64,
64,
64,
64,
64,
64,
64,
64,
64,
64,
64,
64,
64,
64,
64,
64,
64,
64,
64,
64,
64,
64,
64,
64,
64,
64,
64,
64,
64,
64,
64,
64,
64,
64,
64,
64,
64,
64,
64,
64,
64,
64,
64,
64,
64,
64,
64,
64,
64,
24,
24,
24,
32,
64,
32,
32,
24,
8,
16,
8,
24,
40,
64,
16,
16,
16,
32,
24,
48,
64,
64,
64,
24,
64,
64,
40,
32,
32,
24
],
"num_hidden_layers": 80,
"num_key_value_heads": [
8,
8,
8,
8,
8,
8,
8,
8,
8,
8,
8,
8,
8,
8,
8,
8,
8,
8,
8,
8,
8,
8,
8,
8,
8,
8,
8,
8,
8,
8,
8,
8,
8,
8,
8,
8,
8,
8,
8,
8,
8,
8,
8,
8,
8,
8,
8,
8,
8,
8,
3,
3,
3,
4,
8,
4,
4,
3,
1,
2,
1,
3,
5,
8,
2,
2,
2,
4,
3,
6,
8,
8,
8,
3,
8,
8,
5,
4,
4,
3
],
"pretraining_tp": 1,
"rms_norm_eps": 1e-05,
"rope_scaling": null,
"rope_theta": 500000.0,
"tie_word_embeddings": false,
"torch_dtype": "float16",
"transformers_version": "4.41.2",
"use_cache": true,
"vocab_size": 128256,
"first_compressed_layer_idx": 50
}