illian01 commited on
Commit
3f3de7f
1 Parent(s): 6313a33

Add weights and config

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
mlc-chat-config.json ADDED
@@ -0,0 +1,77 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "version": "0.1.0",
3
+ "model_type": "llama",
4
+ "quantization": "q4f16_1",
5
+ "model_config": {
6
+ "hidden_size": 4096,
7
+ "intermediate_size": 11008,
8
+ "num_attention_heads": 32,
9
+ "num_hidden_layers": 17,
10
+ "rms_norm_eps": 1e-06,
11
+ "vocab_size": 32000,
12
+ "position_embedding_base": 10000,
13
+ "context_window_size": 2048,
14
+ "prefill_chunk_size": 2048,
15
+ "num_key_value_heads": 32,
16
+ "head_dim": 128,
17
+ "tensor_parallel_shards": 1,
18
+ "max_batch_size": 80
19
+ },
20
+ "vocab_size": 32000,
21
+ "context_window_size": 2048,
22
+ "sliding_window_size": -1,
23
+ "prefill_chunk_size": 2048,
24
+ "attention_sink_size": -1,
25
+ "tensor_parallel_shards": 1,
26
+ "mean_gen_len": 128,
27
+ "max_gen_len": 512,
28
+ "shift_fill_factor": 0.3,
29
+ "temperature": 0.6,
30
+ "presence_penalty": 0.0,
31
+ "frequency_penalty": 0.0,
32
+ "repetition_penalty": 1.0,
33
+ "top_p": 0.9,
34
+ "tokenizer_files": [
35
+ "tokenizer.model",
36
+ "tokenizer_config.json",
37
+ "tokenizer.json"
38
+ ],
39
+ "tokenizer_info": {
40
+ "token_postproc_method": "byte_fallback",
41
+ "prepend_space_in_encode": true,
42
+ "strip_space_in_decode": true
43
+ },
44
+ "conv_template": {
45
+ "name": "st-llm",
46
+ "system_template": "Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.\n\n### Instruction:\n{MessagePlaceholders.SYSTEM.value}</s>",
47
+ "system_message": "You are a helpful, respectful and honest assistant.",
48
+ "system_prefix_token_ids": null,
49
+ "add_role_after_system_message": true,
50
+ "roles": {
51
+ "user": "### Input:",
52
+ "assistant": "### Response:"
53
+ },
54
+ "role_templates": {
55
+ "user": "{user_message}",
56
+ "assistant": "{assistant_message}",
57
+ "tool": "{tool_message}"
58
+ },
59
+ "messages": [],
60
+ "seps": [
61
+ "</s>"
62
+ ],
63
+ "role_content_sep": "\n",
64
+ "role_empty_sep": "\n",
65
+ "stop_str": [
66
+ "</s>"
67
+ ],
68
+ "stop_token_ids": [
69
+ 2
70
+ ],
71
+ "function_string": "",
72
+ "use_function_calling": false
73
+ },
74
+ "pad_token_id": 0,
75
+ "bos_token_id": 1,
76
+ "eos_token_id": 2
77
+ }
ndarray-cache.json ADDED
@@ -0,0 +1,2395 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "metadata": {
3
+ "ParamSize": 175,
4
+ "ParamBytes": 2082955264.0,
5
+ "BitsPerParam": 4.50044525764654
6
+ },
7
+ "records": [
8
+ {
9
+ "dataPath": "params_shard_0.bin",
10
+ "format": "raw-shard",
11
+ "nbytes": 65536000,
12
+ "records": [
13
+ {
14
+ "name": "model.embed_tokens.q_weight",
15
+ "shape": [
16
+ 32000,
17
+ 512
18
+ ],
19
+ "dtype": "uint32",
20
+ "format": "f32-to-bf16",
21
+ "nbytes": 65536000,
22
+ "byteOffset": 0
23
+ }
24
+ ],
25
+ "md5sum": "0c80f07c0e705a2e2c1cd1d241a4b609"
26
+ },
27
+ {
28
+ "dataPath": "params_shard_1.bin",
29
+ "format": "raw-shard",
30
+ "nbytes": 33357824,
31
+ "records": [
32
+ {
33
+ "name": "model.embed_tokens.q_scale",
34
+ "shape": [
35
+ 32000,
36
+ 128
37
+ ],
38
+ "dtype": "float16",
39
+ "format": "f32-to-bf16",
40
+ "nbytes": 8192000,
41
+ "byteOffset": 0
42
+ },
43
+ {
44
+ "name": "model.layers.0.self_attn.qkv_proj.q_weight",
45
+ "shape": [
46
+ 12288,
47
+ 512
48
+ ],
49
+ "dtype": "uint32",
50
+ "format": "f32-to-bf16",
51
+ "nbytes": 25165824,
52
+ "byteOffset": 8192000
53
+ }
54
+ ],
55
+ "md5sum": "7cb2cf4d089efc4e8f0398043ee2d587"
56
+ },
57
+ {
58
+ "dataPath": "params_shard_2.bin",
59
+ "format": "raw-shard",
60
+ "nbytes": 45088768,
61
+ "records": [
62
+ {
63
+ "name": "model.layers.0.mlp.gate_up_proj.q_weight",
64
+ "shape": [
65
+ 22016,
66
+ 512
67
+ ],
68
+ "dtype": "uint32",
69
+ "format": "f32-to-bf16",
70
+ "nbytes": 45088768,
71
+ "byteOffset": 0
72
+ }
73
+ ],
74
+ "md5sum": "caa1243424708896db6a17dfe758e06b"
75
+ },
76
+ {
77
+ "dataPath": "params_shard_3.bin",
78
+ "format": "raw-shard",
79
+ "nbytes": 22544384,
80
+ "records": [
81
+ {
82
+ "name": "model.layers.0.mlp.down_proj.q_weight",
83
+ "shape": [
84
+ 4096,
85
+ 1376
86
+ ],
87
+ "dtype": "uint32",
88
+ "format": "f32-to-bf16",
89
+ "nbytes": 22544384,
90
+ "byteOffset": 0
91
+ }
92
+ ],
93
+ "md5sum": "11240ff24505ab642aabdf77448f3d25"
94
+ },
95
+ {
96
+ "dataPath": "params_shard_4.bin",
97
+ "format": "raw-shard",
98
+ "nbytes": 25165824,
99
+ "records": [
100
+ {
101
+ "name": "model.layers.1.self_attn.qkv_proj.q_weight",
102
+ "shape": [
103
+ 12288,
104
+ 512
105
+ ],
106
+ "dtype": "uint32",
107
+ "format": "f32-to-bf16",
108
+ "nbytes": 25165824,
109
+ "byteOffset": 0
110
+ }
111
+ ],
112
+ "md5sum": "c6db109227d5e3f1659a6641f04510cf"
113
+ },
114
+ {
115
+ "dataPath": "params_shard_5.bin",
116
+ "format": "raw-shard",
117
+ "nbytes": 32587776,
118
+ "records": [
119
+ {
120
+ "name": "model.layers.0.self_attn.qkv_proj.q_scale",
121
+ "shape": [
122
+ 12288,
123
+ 128
124
+ ],
125
+ "dtype": "float16",
126
+ "format": "f32-to-bf16",
127
+ "nbytes": 3145728,
128
+ "byteOffset": 0
129
+ },
130
+ {
131
+ "name": "model.layers.0.self_attn.o_proj.q_weight",
132
+ "shape": [
133
+ 4096,
134
+ 512
135
+ ],
136
+ "dtype": "uint32",
137
+ "format": "f32-to-bf16",
138
+ "nbytes": 8388608,
139
+ "byteOffset": 3145728
140
+ },
141
+ {
142
+ "name": "model.layers.0.self_attn.o_proj.q_scale",
143
+ "shape": [
144
+ 4096,
145
+ 128
146
+ ],
147
+ "dtype": "float16",
148
+ "format": "f32-to-bf16",
149
+ "nbytes": 1048576,
150
+ "byteOffset": 11534336
151
+ },
152
+ {
153
+ "name": "model.layers.0.mlp.gate_up_proj.q_scale",
154
+ "shape": [
155
+ 22016,
156
+ 128
157
+ ],
158
+ "dtype": "float16",
159
+ "format": "f32-to-bf16",
160
+ "nbytes": 5636096,
161
+ "byteOffset": 12582912
162
+ },
163
+ {
164
+ "name": "model.layers.0.mlp.down_proj.q_scale",
165
+ "shape": [
166
+ 4096,
167
+ 344
168
+ ],
169
+ "dtype": "float16",
170
+ "format": "f32-to-bf16",
171
+ "nbytes": 2818048,
172
+ "byteOffset": 18219008
173
+ },
174
+ {
175
+ "name": "model.layers.0.input_layernorm.weight",
176
+ "shape": [
177
+ 4096
178
+ ],
179
+ "dtype": "float16",
180
+ "format": "f32-to-bf16",
181
+ "nbytes": 8192,
182
+ "byteOffset": 21037056
183
+ },
184
+ {
185
+ "name": "model.layers.0.post_attention_layernorm.weight",
186
+ "shape": [
187
+ 4096
188
+ ],
189
+ "dtype": "float16",
190
+ "format": "f32-to-bf16",
191
+ "nbytes": 8192,
192
+ "byteOffset": 21045248
193
+ },
194
+ {
195
+ "name": "model.layers.1.self_attn.qkv_proj.q_scale",
196
+ "shape": [
197
+ 12288,
198
+ 128
199
+ ],
200
+ "dtype": "float16",
201
+ "format": "f32-to-bf16",
202
+ "nbytes": 3145728,
203
+ "byteOffset": 21053440
204
+ },
205
+ {
206
+ "name": "model.layers.1.self_attn.o_proj.q_weight",
207
+ "shape": [
208
+ 4096,
209
+ 512
210
+ ],
211
+ "dtype": "uint32",
212
+ "format": "f32-to-bf16",
213
+ "nbytes": 8388608,
214
+ "byteOffset": 24199168
215
+ }
216
+ ],
217
+ "md5sum": "180a418bc044d5be3e3e3cbd969b7ac8"
218
+ },
219
+ {
220
+ "dataPath": "params_shard_6.bin",
221
+ "format": "raw-shard",
222
+ "nbytes": 45088768,
223
+ "records": [
224
+ {
225
+ "name": "model.layers.1.mlp.gate_up_proj.q_weight",
226
+ "shape": [
227
+ 22016,
228
+ 512
229
+ ],
230
+ "dtype": "uint32",
231
+ "format": "f32-to-bf16",
232
+ "nbytes": 45088768,
233
+ "byteOffset": 0
234
+ }
235
+ ],
236
+ "md5sum": "21d1038acf0a42100d056bd7934e7114"
237
+ },
238
+ {
239
+ "dataPath": "params_shard_7.bin",
240
+ "format": "raw-shard",
241
+ "nbytes": 25165824,
242
+ "records": [
243
+ {
244
+ "name": "model.layers.2.self_attn.qkv_proj.q_weight",
245
+ "shape": [
246
+ 12288,
247
+ 512
248
+ ],
249
+ "dtype": "uint32",
250
+ "format": "f32-to-bf16",
251
+ "nbytes": 25165824,
252
+ "byteOffset": 0
253
+ }
254
+ ],
255
+ "md5sum": "74b85fe2af8fe0bd74a60cdef00dab56"
256
+ },
257
+ {
258
+ "dataPath": "params_shard_8.bin",
259
+ "format": "raw-shard",
260
+ "nbytes": 32063488,
261
+ "records": [
262
+ {
263
+ "name": "model.layers.1.self_attn.o_proj.q_scale",
264
+ "shape": [
265
+ 4096,
266
+ 128
267
+ ],
268
+ "dtype": "float16",
269
+ "format": "f32-to-bf16",
270
+ "nbytes": 1048576,
271
+ "byteOffset": 0
272
+ },
273
+ {
274
+ "name": "model.layers.1.mlp.gate_up_proj.q_scale",
275
+ "shape": [
276
+ 22016,
277
+ 128
278
+ ],
279
+ "dtype": "float16",
280
+ "format": "f32-to-bf16",
281
+ "nbytes": 5636096,
282
+ "byteOffset": 1048576
283
+ },
284
+ {
285
+ "name": "model.layers.1.mlp.down_proj.q_weight",
286
+ "shape": [
287
+ 4096,
288
+ 1376
289
+ ],
290
+ "dtype": "uint32",
291
+ "format": "f32-to-bf16",
292
+ "nbytes": 22544384,
293
+ "byteOffset": 6684672
294
+ },
295
+ {
296
+ "name": "model.layers.1.mlp.down_proj.q_scale",
297
+ "shape": [
298
+ 4096,
299
+ 344
300
+ ],
301
+ "dtype": "float16",
302
+ "format": "f32-to-bf16",
303
+ "nbytes": 2818048,
304
+ "byteOffset": 29229056
305
+ },
306
+ {
307
+ "name": "model.layers.1.input_layernorm.weight",
308
+ "shape": [
309
+ 4096
310
+ ],
311
+ "dtype": "float16",
312
+ "format": "f32-to-bf16",
313
+ "nbytes": 8192,
314
+ "byteOffset": 32047104
315
+ },
316
+ {
317
+ "name": "model.layers.1.post_attention_layernorm.weight",
318
+ "shape": [
319
+ 4096
320
+ ],
321
+ "dtype": "float16",
322
+ "format": "f32-to-bf16",
323
+ "nbytes": 8192,
324
+ "byteOffset": 32055296
325
+ }
326
+ ],
327
+ "md5sum": "56643a500f2732bdee21c975ff084ae6"
328
+ },
329
+ {
330
+ "dataPath": "params_shard_9.bin",
331
+ "format": "raw-shard",
332
+ "nbytes": 45088768,
333
+ "records": [
334
+ {
335
+ "name": "model.layers.2.mlp.gate_up_proj.q_weight",
336
+ "shape": [
337
+ 22016,
338
+ 512
339
+ ],
340
+ "dtype": "uint32",
341
+ "format": "f32-to-bf16",
342
+ "nbytes": 45088768,
343
+ "byteOffset": 0
344
+ }
345
+ ],
346
+ "md5sum": "f2f2ef6beb194aa2d8a974a4f83c1a1c"
347
+ },
348
+ {
349
+ "dataPath": "params_shard_10.bin",
350
+ "format": "raw-shard",
351
+ "nbytes": 22544384,
352
+ "records": [
353
+ {
354
+ "name": "model.layers.2.mlp.down_proj.q_weight",
355
+ "shape": [
356
+ 4096,
357
+ 1376
358
+ ],
359
+ "dtype": "uint32",
360
+ "format": "f32-to-bf16",
361
+ "nbytes": 22544384,
362
+ "byteOffset": 0
363
+ }
364
+ ],
365
+ "md5sum": "fc02a588f3f4fc8a9d61294c88150097"
366
+ },
367
+ {
368
+ "dataPath": "params_shard_11.bin",
369
+ "format": "raw-shard",
370
+ "nbytes": 25165824,
371
+ "records": [
372
+ {
373
+ "name": "model.layers.3.self_attn.qkv_proj.q_weight",
374
+ "shape": [
375
+ 12288,
376
+ 512
377
+ ],
378
+ "dtype": "uint32",
379
+ "format": "f32-to-bf16",
380
+ "nbytes": 25165824,
381
+ "byteOffset": 0
382
+ }
383
+ ],
384
+ "md5sum": "2f8c920884c554d1c068bd586bbaf716"
385
+ },
386
+ {
387
+ "dataPath": "params_shard_12.bin",
388
+ "format": "raw-shard",
389
+ "nbytes": 32587776,
390
+ "records": [
391
+ {
392
+ "name": "model.layers.2.self_attn.qkv_proj.q_scale",
393
+ "shape": [
394
+ 12288,
395
+ 128
396
+ ],
397
+ "dtype": "float16",
398
+ "format": "f32-to-bf16",
399
+ "nbytes": 3145728,
400
+ "byteOffset": 0
401
+ },
402
+ {
403
+ "name": "model.layers.2.self_attn.o_proj.q_weight",
404
+ "shape": [
405
+ 4096,
406
+ 512
407
+ ],
408
+ "dtype": "uint32",
409
+ "format": "f32-to-bf16",
410
+ "nbytes": 8388608,
411
+ "byteOffset": 3145728
412
+ },
413
+ {
414
+ "name": "model.layers.2.self_attn.o_proj.q_scale",
415
+ "shape": [
416
+ 4096,
417
+ 128
418
+ ],
419
+ "dtype": "float16",
420
+ "format": "f32-to-bf16",
421
+ "nbytes": 1048576,
422
+ "byteOffset": 11534336
423
+ },
424
+ {
425
+ "name": "model.layers.2.mlp.gate_up_proj.q_scale",
426
+ "shape": [
427
+ 22016,
428
+ 128
429
+ ],
430
+ "dtype": "float16",
431
+ "format": "f32-to-bf16",
432
+ "nbytes": 5636096,
433
+ "byteOffset": 12582912
434
+ },
435
+ {
436
+ "name": "model.layers.2.mlp.down_proj.q_scale",
437
+ "shape": [
438
+ 4096,
439
+ 344
440
+ ],
441
+ "dtype": "float16",
442
+ "format": "f32-to-bf16",
443
+ "nbytes": 2818048,
444
+ "byteOffset": 18219008
445
+ },
446
+ {
447
+ "name": "model.layers.2.input_layernorm.weight",
448
+ "shape": [
449
+ 4096
450
+ ],
451
+ "dtype": "float16",
452
+ "format": "f32-to-bf16",
453
+ "nbytes": 8192,
454
+ "byteOffset": 21037056
455
+ },
456
+ {
457
+ "name": "model.layers.2.post_attention_layernorm.weight",
458
+ "shape": [
459
+ 4096
460
+ ],
461
+ "dtype": "float16",
462
+ "format": "f32-to-bf16",
463
+ "nbytes": 8192,
464
+ "byteOffset": 21045248
465
+ },
466
+ {
467
+ "name": "model.layers.3.self_attn.qkv_proj.q_scale",
468
+ "shape": [
469
+ 12288,
470
+ 128
471
+ ],
472
+ "dtype": "float16",
473
+ "format": "f32-to-bf16",
474
+ "nbytes": 3145728,
475
+ "byteOffset": 21053440
476
+ },
477
+ {
478
+ "name": "model.layers.3.self_attn.o_proj.q_weight",
479
+ "shape": [
480
+ 4096,
481
+ 512
482
+ ],
483
+ "dtype": "uint32",
484
+ "format": "f32-to-bf16",
485
+ "nbytes": 8388608,
486
+ "byteOffset": 24199168
487
+ }
488
+ ],
489
+ "md5sum": "966951e3ee637b3d25f2ccf5c33962ee"
490
+ },
491
+ {
492
+ "dataPath": "params_shard_13.bin",
493
+ "format": "raw-shard",
494
+ "nbytes": 45088768,
495
+ "records": [
496
+ {
497
+ "name": "model.layers.3.mlp.gate_up_proj.q_weight",
498
+ "shape": [
499
+ 22016,
500
+ 512
501
+ ],
502
+ "dtype": "uint32",
503
+ "format": "f32-to-bf16",
504
+ "nbytes": 45088768,
505
+ "byteOffset": 0
506
+ }
507
+ ],
508
+ "md5sum": "feb8d01187dde104bbc36b0015648647"
509
+ },
510
+ {
511
+ "dataPath": "params_shard_14.bin",
512
+ "format": "raw-shard",
513
+ "nbytes": 25165824,
514
+ "records": [
515
+ {
516
+ "name": "model.layers.4.self_attn.qkv_proj.q_weight",
517
+ "shape": [
518
+ 12288,
519
+ 512
520
+ ],
521
+ "dtype": "uint32",
522
+ "format": "f32-to-bf16",
523
+ "nbytes": 25165824,
524
+ "byteOffset": 0
525
+ }
526
+ ],
527
+ "md5sum": "cc381114da3cc1b73f351e768d9c78d0"
528
+ },
529
+ {
530
+ "dataPath": "params_shard_15.bin",
531
+ "format": "raw-shard",
532
+ "nbytes": 32063488,
533
+ "records": [
534
+ {
535
+ "name": "model.layers.3.self_attn.o_proj.q_scale",
536
+ "shape": [
537
+ 4096,
538
+ 128
539
+ ],
540
+ "dtype": "float16",
541
+ "format": "f32-to-bf16",
542
+ "nbytes": 1048576,
543
+ "byteOffset": 0
544
+ },
545
+ {
546
+ "name": "model.layers.3.mlp.gate_up_proj.q_scale",
547
+ "shape": [
548
+ 22016,
549
+ 128
550
+ ],
551
+ "dtype": "float16",
552
+ "format": "f32-to-bf16",
553
+ "nbytes": 5636096,
554
+ "byteOffset": 1048576
555
+ },
556
+ {
557
+ "name": "model.layers.3.mlp.down_proj.q_weight",
558
+ "shape": [
559
+ 4096,
560
+ 1376
561
+ ],
562
+ "dtype": "uint32",
563
+ "format": "f32-to-bf16",
564
+ "nbytes": 22544384,
565
+ "byteOffset": 6684672
566
+ },
567
+ {
568
+ "name": "model.layers.3.mlp.down_proj.q_scale",
569
+ "shape": [
570
+ 4096,
571
+ 344
572
+ ],
573
+ "dtype": "float16",
574
+ "format": "f32-to-bf16",
575
+ "nbytes": 2818048,
576
+ "byteOffset": 29229056
577
+ },
578
+ {
579
+ "name": "model.layers.3.input_layernorm.weight",
580
+ "shape": [
581
+ 4096
582
+ ],
583
+ "dtype": "float16",
584
+ "format": "f32-to-bf16",
585
+ "nbytes": 8192,
586
+ "byteOffset": 32047104
587
+ },
588
+ {
589
+ "name": "model.layers.3.post_attention_layernorm.weight",
590
+ "shape": [
591
+ 4096
592
+ ],
593
+ "dtype": "float16",
594
+ "format": "f32-to-bf16",
595
+ "nbytes": 8192,
596
+ "byteOffset": 32055296
597
+ }
598
+ ],
599
+ "md5sum": "9c4f4401e504ff51a8670de6b8cd10c9"
600
+ },
601
+ {
602
+ "dataPath": "params_shard_16.bin",
603
+ "format": "raw-shard",
604
+ "nbytes": 45088768,
605
+ "records": [
606
+ {
607
+ "name": "model.layers.4.mlp.gate_up_proj.q_weight",
608
+ "shape": [
609
+ 22016,
610
+ 512
611
+ ],
612
+ "dtype": "uint32",
613
+ "format": "f32-to-bf16",
614
+ "nbytes": 45088768,
615
+ "byteOffset": 0
616
+ }
617
+ ],
618
+ "md5sum": "969a13b8bd14af892f1a47de6f3bdf6f"
619
+ },
620
+ {
621
+ "dataPath": "params_shard_17.bin",
622
+ "format": "raw-shard",
623
+ "nbytes": 22544384,
624
+ "records": [
625
+ {
626
+ "name": "model.layers.4.mlp.down_proj.q_weight",
627
+ "shape": [
628
+ 4096,
629
+ 1376
630
+ ],
631
+ "dtype": "uint32",
632
+ "format": "f32-to-bf16",
633
+ "nbytes": 22544384,
634
+ "byteOffset": 0
635
+ }
636
+ ],
637
+ "md5sum": "bbd77d563bda4d127ad7a44a5234c72e"
638
+ },
639
+ {
640
+ "dataPath": "params_shard_18.bin",
641
+ "format": "raw-shard",
642
+ "nbytes": 25165824,
643
+ "records": [
644
+ {
645
+ "name": "model.layers.5.self_attn.qkv_proj.q_weight",
646
+ "shape": [
647
+ 12288,
648
+ 512
649
+ ],
650
+ "dtype": "uint32",
651
+ "format": "f32-to-bf16",
652
+ "nbytes": 25165824,
653
+ "byteOffset": 0
654
+ }
655
+ ],
656
+ "md5sum": "2f42b0fb6f06cdd44054e42926064765"
657
+ },
658
+ {
659
+ "dataPath": "params_shard_19.bin",
660
+ "format": "raw-shard",
661
+ "nbytes": 32587776,
662
+ "records": [
663
+ {
664
+ "name": "model.layers.4.self_attn.qkv_proj.q_scale",
665
+ "shape": [
666
+ 12288,
667
+ 128
668
+ ],
669
+ "dtype": "float16",
670
+ "format": "f32-to-bf16",
671
+ "nbytes": 3145728,
672
+ "byteOffset": 0
673
+ },
674
+ {
675
+ "name": "model.layers.4.self_attn.o_proj.q_weight",
676
+ "shape": [
677
+ 4096,
678
+ 512
679
+ ],
680
+ "dtype": "uint32",
681
+ "format": "f32-to-bf16",
682
+ "nbytes": 8388608,
683
+ "byteOffset": 3145728
684
+ },
685
+ {
686
+ "name": "model.layers.4.self_attn.o_proj.q_scale",
687
+ "shape": [
688
+ 4096,
689
+ 128
690
+ ],
691
+ "dtype": "float16",
692
+ "format": "f32-to-bf16",
693
+ "nbytes": 1048576,
694
+ "byteOffset": 11534336
695
+ },
696
+ {
697
+ "name": "model.layers.4.mlp.gate_up_proj.q_scale",
698
+ "shape": [
699
+ 22016,
700
+ 128
701
+ ],
702
+ "dtype": "float16",
703
+ "format": "f32-to-bf16",
704
+ "nbytes": 5636096,
705
+ "byteOffset": 12582912
706
+ },
707
+ {
708
+ "name": "model.layers.4.mlp.down_proj.q_scale",
709
+ "shape": [
710
+ 4096,
711
+ 344
712
+ ],
713
+ "dtype": "float16",
714
+ "format": "f32-to-bf16",
715
+ "nbytes": 2818048,
716
+ "byteOffset": 18219008
717
+ },
718
+ {
719
+ "name": "model.layers.4.input_layernorm.weight",
720
+ "shape": [
721
+ 4096
722
+ ],
723
+ "dtype": "float16",
724
+ "format": "f32-to-bf16",
725
+ "nbytes": 8192,
726
+ "byteOffset": 21037056
727
+ },
728
+ {
729
+ "name": "model.layers.4.post_attention_layernorm.weight",
730
+ "shape": [
731
+ 4096
732
+ ],
733
+ "dtype": "float16",
734
+ "format": "f32-to-bf16",
735
+ "nbytes": 8192,
736
+ "byteOffset": 21045248
737
+ },
738
+ {
739
+ "name": "model.layers.5.self_attn.qkv_proj.q_scale",
740
+ "shape": [
741
+ 12288,
742
+ 128
743
+ ],
744
+ "dtype": "float16",
745
+ "format": "f32-to-bf16",
746
+ "nbytes": 3145728,
747
+ "byteOffset": 21053440
748
+ },
749
+ {
750
+ "name": "model.layers.5.self_attn.o_proj.q_weight",
751
+ "shape": [
752
+ 4096,
753
+ 512
754
+ ],
755
+ "dtype": "uint32",
756
+ "format": "f32-to-bf16",
757
+ "nbytes": 8388608,
758
+ "byteOffset": 24199168
759
+ }
760
+ ],
761
+ "md5sum": "95fee31a0e963a5282a83fbf4903b056"
762
+ },
763
+ {
764
+ "dataPath": "params_shard_20.bin",
765
+ "format": "raw-shard",
766
+ "nbytes": 45088768,
767
+ "records": [
768
+ {
769
+ "name": "model.layers.5.mlp.gate_up_proj.q_weight",
770
+ "shape": [
771
+ 22016,
772
+ 512
773
+ ],
774
+ "dtype": "uint32",
775
+ "format": "f32-to-bf16",
776
+ "nbytes": 45088768,
777
+ "byteOffset": 0
778
+ }
779
+ ],
780
+ "md5sum": "20f7954fd752aa89db92760d3191eddf"
781
+ },
782
+ {
783
+ "dataPath": "params_shard_21.bin",
784
+ "format": "raw-shard",
785
+ "nbytes": 25165824,
786
+ "records": [
787
+ {
788
+ "name": "model.layers.6.self_attn.qkv_proj.q_weight",
789
+ "shape": [
790
+ 12288,
791
+ 512
792
+ ],
793
+ "dtype": "uint32",
794
+ "format": "f32-to-bf16",
795
+ "nbytes": 25165824,
796
+ "byteOffset": 0
797
+ }
798
+ ],
799
+ "md5sum": "05d71f8b2fd3719876737bfba90ee4c0"
800
+ },
801
+ {
802
+ "dataPath": "params_shard_22.bin",
803
+ "format": "raw-shard",
804
+ "nbytes": 32063488,
805
+ "records": [
806
+ {
807
+ "name": "model.layers.5.self_attn.o_proj.q_scale",
808
+ "shape": [
809
+ 4096,
810
+ 128
811
+ ],
812
+ "dtype": "float16",
813
+ "format": "f32-to-bf16",
814
+ "nbytes": 1048576,
815
+ "byteOffset": 0
816
+ },
817
+ {
818
+ "name": "model.layers.5.mlp.gate_up_proj.q_scale",
819
+ "shape": [
820
+ 22016,
821
+ 128
822
+ ],
823
+ "dtype": "float16",
824
+ "format": "f32-to-bf16",
825
+ "nbytes": 5636096,
826
+ "byteOffset": 1048576
827
+ },
828
+ {
829
+ "name": "model.layers.5.mlp.down_proj.q_weight",
830
+ "shape": [
831
+ 4096,
832
+ 1376
833
+ ],
834
+ "dtype": "uint32",
835
+ "format": "f32-to-bf16",
836
+ "nbytes": 22544384,
837
+ "byteOffset": 6684672
838
+ },
839
+ {
840
+ "name": "model.layers.5.mlp.down_proj.q_scale",
841
+ "shape": [
842
+ 4096,
843
+ 344
844
+ ],
845
+ "dtype": "float16",
846
+ "format": "f32-to-bf16",
847
+ "nbytes": 2818048,
848
+ "byteOffset": 29229056
849
+ },
850
+ {
851
+ "name": "model.layers.5.input_layernorm.weight",
852
+ "shape": [
853
+ 4096
854
+ ],
855
+ "dtype": "float16",
856
+ "format": "f32-to-bf16",
857
+ "nbytes": 8192,
858
+ "byteOffset": 32047104
859
+ },
860
+ {
861
+ "name": "model.layers.5.post_attention_layernorm.weight",
862
+ "shape": [
863
+ 4096
864
+ ],
865
+ "dtype": "float16",
866
+ "format": "f32-to-bf16",
867
+ "nbytes": 8192,
868
+ "byteOffset": 32055296
869
+ }
870
+ ],
871
+ "md5sum": "1f64e37be45d052fe3d264fd700fb53c"
872
+ },
873
+ {
874
+ "dataPath": "params_shard_23.bin",
875
+ "format": "raw-shard",
876
+ "nbytes": 45088768,
877
+ "records": [
878
+ {
879
+ "name": "model.layers.6.mlp.gate_up_proj.q_weight",
880
+ "shape": [
881
+ 22016,
882
+ 512
883
+ ],
884
+ "dtype": "uint32",
885
+ "format": "f32-to-bf16",
886
+ "nbytes": 45088768,
887
+ "byteOffset": 0
888
+ }
889
+ ],
890
+ "md5sum": "2ef9699b390d6e8706188197ec40c8f1"
891
+ },
892
+ {
893
+ "dataPath": "params_shard_24.bin",
894
+ "format": "raw-shard",
895
+ "nbytes": 22544384,
896
+ "records": [
897
+ {
898
+ "name": "model.layers.6.mlp.down_proj.q_weight",
899
+ "shape": [
900
+ 4096,
901
+ 1376
902
+ ],
903
+ "dtype": "uint32",
904
+ "format": "f32-to-bf16",
905
+ "nbytes": 22544384,
906
+ "byteOffset": 0
907
+ }
908
+ ],
909
+ "md5sum": "6cd3ca07a86d12f62ee11ccf2379d86d"
910
+ },
911
+ {
912
+ "dataPath": "params_shard_25.bin",
913
+ "format": "raw-shard",
914
+ "nbytes": 25165824,
915
+ "records": [
916
+ {
917
+ "name": "model.layers.7.self_attn.qkv_proj.q_weight",
918
+ "shape": [
919
+ 12288,
920
+ 512
921
+ ],
922
+ "dtype": "uint32",
923
+ "format": "f32-to-bf16",
924
+ "nbytes": 25165824,
925
+ "byteOffset": 0
926
+ }
927
+ ],
928
+ "md5sum": "ae283ce0ce29814081b952312f0e181c"
929
+ },
930
+ {
931
+ "dataPath": "params_shard_26.bin",
932
+ "format": "raw-shard",
933
+ "nbytes": 32587776,
934
+ "records": [
935
+ {
936
+ "name": "model.layers.6.self_attn.qkv_proj.q_scale",
937
+ "shape": [
938
+ 12288,
939
+ 128
940
+ ],
941
+ "dtype": "float16",
942
+ "format": "f32-to-bf16",
943
+ "nbytes": 3145728,
944
+ "byteOffset": 0
945
+ },
946
+ {
947
+ "name": "model.layers.6.self_attn.o_proj.q_weight",
948
+ "shape": [
949
+ 4096,
950
+ 512
951
+ ],
952
+ "dtype": "uint32",
953
+ "format": "f32-to-bf16",
954
+ "nbytes": 8388608,
955
+ "byteOffset": 3145728
956
+ },
957
+ {
958
+ "name": "model.layers.6.self_attn.o_proj.q_scale",
959
+ "shape": [
960
+ 4096,
961
+ 128
962
+ ],
963
+ "dtype": "float16",
964
+ "format": "f32-to-bf16",
965
+ "nbytes": 1048576,
966
+ "byteOffset": 11534336
967
+ },
968
+ {
969
+ "name": "model.layers.6.mlp.gate_up_proj.q_scale",
970
+ "shape": [
971
+ 22016,
972
+ 128
973
+ ],
974
+ "dtype": "float16",
975
+ "format": "f32-to-bf16",
976
+ "nbytes": 5636096,
977
+ "byteOffset": 12582912
978
+ },
979
+ {
980
+ "name": "model.layers.6.mlp.down_proj.q_scale",
981
+ "shape": [
982
+ 4096,
983
+ 344
984
+ ],
985
+ "dtype": "float16",
986
+ "format": "f32-to-bf16",
987
+ "nbytes": 2818048,
988
+ "byteOffset": 18219008
989
+ },
990
+ {
991
+ "name": "model.layers.6.input_layernorm.weight",
992
+ "shape": [
993
+ 4096
994
+ ],
995
+ "dtype": "float16",
996
+ "format": "f32-to-bf16",
997
+ "nbytes": 8192,
998
+ "byteOffset": 21037056
999
+ },
1000
+ {
1001
+ "name": "model.layers.6.post_attention_layernorm.weight",
1002
+ "shape": [
1003
+ 4096
1004
+ ],
1005
+ "dtype": "float16",
1006
+ "format": "f32-to-bf16",
1007
+ "nbytes": 8192,
1008
+ "byteOffset": 21045248
1009
+ },
1010
+ {
1011
+ "name": "model.layers.7.self_attn.qkv_proj.q_scale",
1012
+ "shape": [
1013
+ 12288,
1014
+ 128
1015
+ ],
1016
+ "dtype": "float16",
1017
+ "format": "f32-to-bf16",
1018
+ "nbytes": 3145728,
1019
+ "byteOffset": 21053440
1020
+ },
1021
+ {
1022
+ "name": "model.layers.7.self_attn.o_proj.q_weight",
1023
+ "shape": [
1024
+ 4096,
1025
+ 512
1026
+ ],
1027
+ "dtype": "uint32",
1028
+ "format": "f32-to-bf16",
1029
+ "nbytes": 8388608,
1030
+ "byteOffset": 24199168
1031
+ }
1032
+ ],
1033
+ "md5sum": "fcba23e82341dce9cb3ad860da7d2f3f"
1034
+ },
1035
+ {
1036
+ "dataPath": "params_shard_27.bin",
1037
+ "format": "raw-shard",
1038
+ "nbytes": 45088768,
1039
+ "records": [
1040
+ {
1041
+ "name": "model.layers.7.mlp.gate_up_proj.q_weight",
1042
+ "shape": [
1043
+ 22016,
1044
+ 512
1045
+ ],
1046
+ "dtype": "uint32",
1047
+ "format": "f32-to-bf16",
1048
+ "nbytes": 45088768,
1049
+ "byteOffset": 0
1050
+ }
1051
+ ],
1052
+ "md5sum": "699b46d2d1f5da456315aa38db192157"
1053
+ },
1054
+ {
1055
+ "dataPath": "params_shard_28.bin",
1056
+ "format": "raw-shard",
1057
+ "nbytes": 25165824,
1058
+ "records": [
1059
+ {
1060
+ "name": "model.layers.8.self_attn.qkv_proj.q_weight",
1061
+ "shape": [
1062
+ 12288,
1063
+ 512
1064
+ ],
1065
+ "dtype": "uint32",
1066
+ "format": "f32-to-bf16",
1067
+ "nbytes": 25165824,
1068
+ "byteOffset": 0
1069
+ }
1070
+ ],
1071
+ "md5sum": "bd62335afdfb632a45cc2e6c91f1b44b"
1072
+ },
1073
+ {
1074
+ "dataPath": "params_shard_29.bin",
1075
+ "format": "raw-shard",
1076
+ "nbytes": 32063488,
1077
+ "records": [
1078
+ {
1079
+ "name": "model.layers.7.self_attn.o_proj.q_scale",
1080
+ "shape": [
1081
+ 4096,
1082
+ 128
1083
+ ],
1084
+ "dtype": "float16",
1085
+ "format": "f32-to-bf16",
1086
+ "nbytes": 1048576,
1087
+ "byteOffset": 0
1088
+ },
1089
+ {
1090
+ "name": "model.layers.7.mlp.gate_up_proj.q_scale",
1091
+ "shape": [
1092
+ 22016,
1093
+ 128
1094
+ ],
1095
+ "dtype": "float16",
1096
+ "format": "f32-to-bf16",
1097
+ "nbytes": 5636096,
1098
+ "byteOffset": 1048576
1099
+ },
1100
+ {
1101
+ "name": "model.layers.7.mlp.down_proj.q_weight",
1102
+ "shape": [
1103
+ 4096,
1104
+ 1376
1105
+ ],
1106
+ "dtype": "uint32",
1107
+ "format": "f32-to-bf16",
1108
+ "nbytes": 22544384,
1109
+ "byteOffset": 6684672
1110
+ },
1111
+ {
1112
+ "name": "model.layers.7.mlp.down_proj.q_scale",
1113
+ "shape": [
1114
+ 4096,
1115
+ 344
1116
+ ],
1117
+ "dtype": "float16",
1118
+ "format": "f32-to-bf16",
1119
+ "nbytes": 2818048,
1120
+ "byteOffset": 29229056
1121
+ },
1122
+ {
1123
+ "name": "model.layers.7.input_layernorm.weight",
1124
+ "shape": [
1125
+ 4096
1126
+ ],
1127
+ "dtype": "float16",
1128
+ "format": "f32-to-bf16",
1129
+ "nbytes": 8192,
1130
+ "byteOffset": 32047104
1131
+ },
1132
+ {
1133
+ "name": "model.layers.7.post_attention_layernorm.weight",
1134
+ "shape": [
1135
+ 4096
1136
+ ],
1137
+ "dtype": "float16",
1138
+ "format": "f32-to-bf16",
1139
+ "nbytes": 8192,
1140
+ "byteOffset": 32055296
1141
+ }
1142
+ ],
1143
+ "md5sum": "74c0d61cfa83c3e7082da4f40da88746"
1144
+ },
1145
+ {
1146
+ "dataPath": "params_shard_30.bin",
1147
+ "format": "raw-shard",
1148
+ "nbytes": 45088768,
1149
+ "records": [
1150
+ {
1151
+ "name": "model.layers.8.mlp.gate_up_proj.q_weight",
1152
+ "shape": [
1153
+ 22016,
1154
+ 512
1155
+ ],
1156
+ "dtype": "uint32",
1157
+ "format": "f32-to-bf16",
1158
+ "nbytes": 45088768,
1159
+ "byteOffset": 0
1160
+ }
1161
+ ],
1162
+ "md5sum": "3ef2125e3d6ad0ba41b0d52d4a935cd0"
1163
+ },
1164
+ {
1165
+ "dataPath": "params_shard_31.bin",
1166
+ "format": "raw-shard",
1167
+ "nbytes": 22544384,
1168
+ "records": [
1169
+ {
1170
+ "name": "model.layers.8.mlp.down_proj.q_weight",
1171
+ "shape": [
1172
+ 4096,
1173
+ 1376
1174
+ ],
1175
+ "dtype": "uint32",
1176
+ "format": "f32-to-bf16",
1177
+ "nbytes": 22544384,
1178
+ "byteOffset": 0
1179
+ }
1180
+ ],
1181
+ "md5sum": "2e32a84420247b18d1543a2e2cbb5ca6"
1182
+ },
1183
+ {
1184
+ "dataPath": "params_shard_32.bin",
1185
+ "format": "raw-shard",
1186
+ "nbytes": 25165824,
1187
+ "records": [
1188
+ {
1189
+ "name": "model.layers.9.self_attn.qkv_proj.q_weight",
1190
+ "shape": [
1191
+ 12288,
1192
+ 512
1193
+ ],
1194
+ "dtype": "uint32",
1195
+ "format": "f32-to-bf16",
1196
+ "nbytes": 25165824,
1197
+ "byteOffset": 0
1198
+ }
1199
+ ],
1200
+ "md5sum": "7219fa6ba4178ab2468e66b64eb925a6"
1201
+ },
1202
+ {
1203
+ "dataPath": "params_shard_33.bin",
1204
+ "format": "raw-shard",
1205
+ "nbytes": 32587776,
1206
+ "records": [
1207
+ {
1208
+ "name": "model.layers.8.self_attn.qkv_proj.q_scale",
1209
+ "shape": [
1210
+ 12288,
1211
+ 128
1212
+ ],
1213
+ "dtype": "float16",
1214
+ "format": "f32-to-bf16",
1215
+ "nbytes": 3145728,
1216
+ "byteOffset": 0
1217
+ },
1218
+ {
1219
+ "name": "model.layers.8.self_attn.o_proj.q_weight",
1220
+ "shape": [
1221
+ 4096,
1222
+ 512
1223
+ ],
1224
+ "dtype": "uint32",
1225
+ "format": "f32-to-bf16",
1226
+ "nbytes": 8388608,
1227
+ "byteOffset": 3145728
1228
+ },
1229
+ {
1230
+ "name": "model.layers.8.self_attn.o_proj.q_scale",
1231
+ "shape": [
1232
+ 4096,
1233
+ 128
1234
+ ],
1235
+ "dtype": "float16",
1236
+ "format": "f32-to-bf16",
1237
+ "nbytes": 1048576,
1238
+ "byteOffset": 11534336
1239
+ },
1240
+ {
1241
+ "name": "model.layers.8.mlp.gate_up_proj.q_scale",
1242
+ "shape": [
1243
+ 22016,
1244
+ 128
1245
+ ],
1246
+ "dtype": "float16",
1247
+ "format": "f32-to-bf16",
1248
+ "nbytes": 5636096,
1249
+ "byteOffset": 12582912
1250
+ },
1251
+ {
1252
+ "name": "model.layers.8.mlp.down_proj.q_scale",
1253
+ "shape": [
1254
+ 4096,
1255
+ 344
1256
+ ],
1257
+ "dtype": "float16",
1258
+ "format": "f32-to-bf16",
1259
+ "nbytes": 2818048,
1260
+ "byteOffset": 18219008
1261
+ },
1262
+ {
1263
+ "name": "model.layers.8.input_layernorm.weight",
1264
+ "shape": [
1265
+ 4096
1266
+ ],
1267
+ "dtype": "float16",
1268
+ "format": "f32-to-bf16",
1269
+ "nbytes": 8192,
1270
+ "byteOffset": 21037056
1271
+ },
1272
+ {
1273
+ "name": "model.layers.8.post_attention_layernorm.weight",
1274
+ "shape": [
1275
+ 4096
1276
+ ],
1277
+ "dtype": "float16",
1278
+ "format": "f32-to-bf16",
1279
+ "nbytes": 8192,
1280
+ "byteOffset": 21045248
1281
+ },
1282
+ {
1283
+ "name": "model.layers.9.self_attn.qkv_proj.q_scale",
1284
+ "shape": [
1285
+ 12288,
1286
+ 128
1287
+ ],
1288
+ "dtype": "float16",
1289
+ "format": "f32-to-bf16",
1290
+ "nbytes": 3145728,
1291
+ "byteOffset": 21053440
1292
+ },
1293
+ {
1294
+ "name": "model.layers.9.self_attn.o_proj.q_weight",
1295
+ "shape": [
1296
+ 4096,
1297
+ 512
1298
+ ],
1299
+ "dtype": "uint32",
1300
+ "format": "f32-to-bf16",
1301
+ "nbytes": 8388608,
1302
+ "byteOffset": 24199168
1303
+ }
1304
+ ],
1305
+ "md5sum": "4ba50eb46a4852cc6eea12850222bef7"
1306
+ },
1307
+ {
1308
+ "dataPath": "params_shard_34.bin",
1309
+ "format": "raw-shard",
1310
+ "nbytes": 45088768,
1311
+ "records": [
1312
+ {
1313
+ "name": "model.layers.9.mlp.gate_up_proj.q_weight",
1314
+ "shape": [
1315
+ 22016,
1316
+ 512
1317
+ ],
1318
+ "dtype": "uint32",
1319
+ "format": "f32-to-bf16",
1320
+ "nbytes": 45088768,
1321
+ "byteOffset": 0
1322
+ }
1323
+ ],
1324
+ "md5sum": "e8a62f76bd551abcd10c3e3e125b7c5b"
1325
+ },
1326
+ {
1327
+ "dataPath": "params_shard_35.bin",
1328
+ "format": "raw-shard",
1329
+ "nbytes": 25165824,
1330
+ "records": [
1331
+ {
1332
+ "name": "model.layers.10.self_attn.qkv_proj.q_weight",
1333
+ "shape": [
1334
+ 12288,
1335
+ 512
1336
+ ],
1337
+ "dtype": "uint32",
1338
+ "format": "f32-to-bf16",
1339
+ "nbytes": 25165824,
1340
+ "byteOffset": 0
1341
+ }
1342
+ ],
1343
+ "md5sum": "2302523927b4273eeb18c56c1976f8cc"
1344
+ },
1345
+ {
1346
+ "dataPath": "params_shard_36.bin",
1347
+ "format": "raw-shard",
1348
+ "nbytes": 32063488,
1349
+ "records": [
1350
+ {
1351
+ "name": "model.layers.9.self_attn.o_proj.q_scale",
1352
+ "shape": [
1353
+ 4096,
1354
+ 128
1355
+ ],
1356
+ "dtype": "float16",
1357
+ "format": "f32-to-bf16",
1358
+ "nbytes": 1048576,
1359
+ "byteOffset": 0
1360
+ },
1361
+ {
1362
+ "name": "model.layers.9.mlp.gate_up_proj.q_scale",
1363
+ "shape": [
1364
+ 22016,
1365
+ 128
1366
+ ],
1367
+ "dtype": "float16",
1368
+ "format": "f32-to-bf16",
1369
+ "nbytes": 5636096,
1370
+ "byteOffset": 1048576
1371
+ },
1372
+ {
1373
+ "name": "model.layers.9.mlp.down_proj.q_weight",
1374
+ "shape": [
1375
+ 4096,
1376
+ 1376
1377
+ ],
1378
+ "dtype": "uint32",
1379
+ "format": "f32-to-bf16",
1380
+ "nbytes": 22544384,
1381
+ "byteOffset": 6684672
1382
+ },
1383
+ {
1384
+ "name": "model.layers.9.mlp.down_proj.q_scale",
1385
+ "shape": [
1386
+ 4096,
1387
+ 344
1388
+ ],
1389
+ "dtype": "float16",
1390
+ "format": "f32-to-bf16",
1391
+ "nbytes": 2818048,
1392
+ "byteOffset": 29229056
1393
+ },
1394
+ {
1395
+ "name": "model.layers.9.input_layernorm.weight",
1396
+ "shape": [
1397
+ 4096
1398
+ ],
1399
+ "dtype": "float16",
1400
+ "format": "f32-to-bf16",
1401
+ "nbytes": 8192,
1402
+ "byteOffset": 32047104
1403
+ },
1404
+ {
1405
+ "name": "model.layers.9.post_attention_layernorm.weight",
1406
+ "shape": [
1407
+ 4096
1408
+ ],
1409
+ "dtype": "float16",
1410
+ "format": "f32-to-bf16",
1411
+ "nbytes": 8192,
1412
+ "byteOffset": 32055296
1413
+ }
1414
+ ],
1415
+ "md5sum": "873054a18b28147ccbc0bb8ec58c2201"
1416
+ },
1417
+ {
1418
+ "dataPath": "params_shard_37.bin",
1419
+ "format": "raw-shard",
1420
+ "nbytes": 45088768,
1421
+ "records": [
1422
+ {
1423
+ "name": "model.layers.10.mlp.gate_up_proj.q_weight",
1424
+ "shape": [
1425
+ 22016,
1426
+ 512
1427
+ ],
1428
+ "dtype": "uint32",
1429
+ "format": "f32-to-bf16",
1430
+ "nbytes": 45088768,
1431
+ "byteOffset": 0
1432
+ }
1433
+ ],
1434
+ "md5sum": "4e7af0f5b05a0f9c399088231cd684ee"
1435
+ },
1436
+ {
1437
+ "dataPath": "params_shard_38.bin",
1438
+ "format": "raw-shard",
1439
+ "nbytes": 22544384,
1440
+ "records": [
1441
+ {
1442
+ "name": "model.layers.10.mlp.down_proj.q_weight",
1443
+ "shape": [
1444
+ 4096,
1445
+ 1376
1446
+ ],
1447
+ "dtype": "uint32",
1448
+ "format": "f32-to-bf16",
1449
+ "nbytes": 22544384,
1450
+ "byteOffset": 0
1451
+ }
1452
+ ],
1453
+ "md5sum": "5993975a2b6771cbcda9d79ee7f31245"
1454
+ },
1455
+ {
1456
+ "dataPath": "params_shard_39.bin",
1457
+ "format": "raw-shard",
1458
+ "nbytes": 25165824,
1459
+ "records": [
1460
+ {
1461
+ "name": "model.layers.11.self_attn.qkv_proj.q_weight",
1462
+ "shape": [
1463
+ 12288,
1464
+ 512
1465
+ ],
1466
+ "dtype": "uint32",
1467
+ "format": "f32-to-bf16",
1468
+ "nbytes": 25165824,
1469
+ "byteOffset": 0
1470
+ }
1471
+ ],
1472
+ "md5sum": "1951fdb99a782b3dbc318d4f15bd63fa"
1473
+ },
1474
+ {
1475
+ "dataPath": "params_shard_40.bin",
1476
+ "format": "raw-shard",
1477
+ "nbytes": 32587776,
1478
+ "records": [
1479
+ {
1480
+ "name": "model.layers.10.self_attn.qkv_proj.q_scale",
1481
+ "shape": [
1482
+ 12288,
1483
+ 128
1484
+ ],
1485
+ "dtype": "float16",
1486
+ "format": "f32-to-bf16",
1487
+ "nbytes": 3145728,
1488
+ "byteOffset": 0
1489
+ },
1490
+ {
1491
+ "name": "model.layers.10.self_attn.o_proj.q_weight",
1492
+ "shape": [
1493
+ 4096,
1494
+ 512
1495
+ ],
1496
+ "dtype": "uint32",
1497
+ "format": "f32-to-bf16",
1498
+ "nbytes": 8388608,
1499
+ "byteOffset": 3145728
1500
+ },
1501
+ {
1502
+ "name": "model.layers.10.self_attn.o_proj.q_scale",
1503
+ "shape": [
1504
+ 4096,
1505
+ 128
1506
+ ],
1507
+ "dtype": "float16",
1508
+ "format": "f32-to-bf16",
1509
+ "nbytes": 1048576,
1510
+ "byteOffset": 11534336
1511
+ },
1512
+ {
1513
+ "name": "model.layers.10.mlp.gate_up_proj.q_scale",
1514
+ "shape": [
1515
+ 22016,
1516
+ 128
1517
+ ],
1518
+ "dtype": "float16",
1519
+ "format": "f32-to-bf16",
1520
+ "nbytes": 5636096,
1521
+ "byteOffset": 12582912
1522
+ },
1523
+ {
1524
+ "name": "model.layers.10.mlp.down_proj.q_scale",
1525
+ "shape": [
1526
+ 4096,
1527
+ 344
1528
+ ],
1529
+ "dtype": "float16",
1530
+ "format": "f32-to-bf16",
1531
+ "nbytes": 2818048,
1532
+ "byteOffset": 18219008
1533
+ },
1534
+ {
1535
+ "name": "model.layers.10.input_layernorm.weight",
1536
+ "shape": [
1537
+ 4096
1538
+ ],
1539
+ "dtype": "float16",
1540
+ "format": "f32-to-bf16",
1541
+ "nbytes": 8192,
1542
+ "byteOffset": 21037056
1543
+ },
1544
+ {
1545
+ "name": "model.layers.10.post_attention_layernorm.weight",
1546
+ "shape": [
1547
+ 4096
1548
+ ],
1549
+ "dtype": "float16",
1550
+ "format": "f32-to-bf16",
1551
+ "nbytes": 8192,
1552
+ "byteOffset": 21045248
1553
+ },
1554
+ {
1555
+ "name": "model.layers.11.self_attn.qkv_proj.q_scale",
1556
+ "shape": [
1557
+ 12288,
1558
+ 128
1559
+ ],
1560
+ "dtype": "float16",
1561
+ "format": "f32-to-bf16",
1562
+ "nbytes": 3145728,
1563
+ "byteOffset": 21053440
1564
+ },
1565
+ {
1566
+ "name": "model.layers.11.self_attn.o_proj.q_weight",
1567
+ "shape": [
1568
+ 4096,
1569
+ 512
1570
+ ],
1571
+ "dtype": "uint32",
1572
+ "format": "f32-to-bf16",
1573
+ "nbytes": 8388608,
1574
+ "byteOffset": 24199168
1575
+ }
1576
+ ],
1577
+ "md5sum": "fd6c4c9bbb84ef263171a5614daa76cc"
1578
+ },
1579
+ {
1580
+ "dataPath": "params_shard_41.bin",
1581
+ "format": "raw-shard",
1582
+ "nbytes": 45088768,
1583
+ "records": [
1584
+ {
1585
+ "name": "model.layers.11.mlp.gate_up_proj.q_weight",
1586
+ "shape": [
1587
+ 22016,
1588
+ 512
1589
+ ],
1590
+ "dtype": "uint32",
1591
+ "format": "f32-to-bf16",
1592
+ "nbytes": 45088768,
1593
+ "byteOffset": 0
1594
+ }
1595
+ ],
1596
+ "md5sum": "2cd319ee56313a3ea29c7bfc4dfa2ad3"
1597
+ },
1598
+ {
1599
+ "dataPath": "params_shard_42.bin",
1600
+ "format": "raw-shard",
1601
+ "nbytes": 25165824,
1602
+ "records": [
1603
+ {
1604
+ "name": "model.layers.12.self_attn.qkv_proj.q_weight",
1605
+ "shape": [
1606
+ 12288,
1607
+ 512
1608
+ ],
1609
+ "dtype": "uint32",
1610
+ "format": "f32-to-bf16",
1611
+ "nbytes": 25165824,
1612
+ "byteOffset": 0
1613
+ }
1614
+ ],
1615
+ "md5sum": "9fdb9ea9168460346d1deafa5115a282"
1616
+ },
1617
+ {
1618
+ "dataPath": "params_shard_43.bin",
1619
+ "format": "raw-shard",
1620
+ "nbytes": 32063488,
1621
+ "records": [
1622
+ {
1623
+ "name": "model.layers.11.self_attn.o_proj.q_scale",
1624
+ "shape": [
1625
+ 4096,
1626
+ 128
1627
+ ],
1628
+ "dtype": "float16",
1629
+ "format": "f32-to-bf16",
1630
+ "nbytes": 1048576,
1631
+ "byteOffset": 0
1632
+ },
1633
+ {
1634
+ "name": "model.layers.11.mlp.gate_up_proj.q_scale",
1635
+ "shape": [
1636
+ 22016,
1637
+ 128
1638
+ ],
1639
+ "dtype": "float16",
1640
+ "format": "f32-to-bf16",
1641
+ "nbytes": 5636096,
1642
+ "byteOffset": 1048576
1643
+ },
1644
+ {
1645
+ "name": "model.layers.11.mlp.down_proj.q_weight",
1646
+ "shape": [
1647
+ 4096,
1648
+ 1376
1649
+ ],
1650
+ "dtype": "uint32",
1651
+ "format": "f32-to-bf16",
1652
+ "nbytes": 22544384,
1653
+ "byteOffset": 6684672
1654
+ },
1655
+ {
1656
+ "name": "model.layers.11.mlp.down_proj.q_scale",
1657
+ "shape": [
1658
+ 4096,
1659
+ 344
1660
+ ],
1661
+ "dtype": "float16",
1662
+ "format": "f32-to-bf16",
1663
+ "nbytes": 2818048,
1664
+ "byteOffset": 29229056
1665
+ },
1666
+ {
1667
+ "name": "model.layers.11.input_layernorm.weight",
1668
+ "shape": [
1669
+ 4096
1670
+ ],
1671
+ "dtype": "float16",
1672
+ "format": "f32-to-bf16",
1673
+ "nbytes": 8192,
1674
+ "byteOffset": 32047104
1675
+ },
1676
+ {
1677
+ "name": "model.layers.11.post_attention_layernorm.weight",
1678
+ "shape": [
1679
+ 4096
1680
+ ],
1681
+ "dtype": "float16",
1682
+ "format": "f32-to-bf16",
1683
+ "nbytes": 8192,
1684
+ "byteOffset": 32055296
1685
+ }
1686
+ ],
1687
+ "md5sum": "4af071255a901797d8e8803f2771596d"
1688
+ },
1689
+ {
1690
+ "dataPath": "params_shard_44.bin",
1691
+ "format": "raw-shard",
1692
+ "nbytes": 45088768,
1693
+ "records": [
1694
+ {
1695
+ "name": "model.layers.12.mlp.gate_up_proj.q_weight",
1696
+ "shape": [
1697
+ 22016,
1698
+ 512
1699
+ ],
1700
+ "dtype": "uint32",
1701
+ "format": "f32-to-bf16",
1702
+ "nbytes": 45088768,
1703
+ "byteOffset": 0
1704
+ }
1705
+ ],
1706
+ "md5sum": "1e23819b13809c063c314faee8f945ac"
1707
+ },
1708
+ {
1709
+ "dataPath": "params_shard_45.bin",
1710
+ "format": "raw-shard",
1711
+ "nbytes": 22544384,
1712
+ "records": [
1713
+ {
1714
+ "name": "model.layers.12.mlp.down_proj.q_weight",
1715
+ "shape": [
1716
+ 4096,
1717
+ 1376
1718
+ ],
1719
+ "dtype": "uint32",
1720
+ "format": "f32-to-bf16",
1721
+ "nbytes": 22544384,
1722
+ "byteOffset": 0
1723
+ }
1724
+ ],
1725
+ "md5sum": "6807ab8932573414e3fd161fe9ab91c7"
1726
+ },
1727
+ {
1728
+ "dataPath": "params_shard_46.bin",
1729
+ "format": "raw-shard",
1730
+ "nbytes": 25165824,
1731
+ "records": [
1732
+ {
1733
+ "name": "model.layers.13.self_attn.qkv_proj.q_weight",
1734
+ "shape": [
1735
+ 12288,
1736
+ 512
1737
+ ],
1738
+ "dtype": "uint32",
1739
+ "format": "f32-to-bf16",
1740
+ "nbytes": 25165824,
1741
+ "byteOffset": 0
1742
+ }
1743
+ ],
1744
+ "md5sum": "31ef0402d240b00fc38c442fb07d3c9c"
1745
+ },
1746
+ {
1747
+ "dataPath": "params_shard_47.bin",
1748
+ "format": "raw-shard",
1749
+ "nbytes": 32587776,
1750
+ "records": [
1751
+ {
1752
+ "name": "model.layers.12.self_attn.qkv_proj.q_scale",
1753
+ "shape": [
1754
+ 12288,
1755
+ 128
1756
+ ],
1757
+ "dtype": "float16",
1758
+ "format": "f32-to-bf16",
1759
+ "nbytes": 3145728,
1760
+ "byteOffset": 0
1761
+ },
1762
+ {
1763
+ "name": "model.layers.12.self_attn.o_proj.q_weight",
1764
+ "shape": [
1765
+ 4096,
1766
+ 512
1767
+ ],
1768
+ "dtype": "uint32",
1769
+ "format": "f32-to-bf16",
1770
+ "nbytes": 8388608,
1771
+ "byteOffset": 3145728
1772
+ },
1773
+ {
1774
+ "name": "model.layers.12.self_attn.o_proj.q_scale",
1775
+ "shape": [
1776
+ 4096,
1777
+ 128
1778
+ ],
1779
+ "dtype": "float16",
1780
+ "format": "f32-to-bf16",
1781
+ "nbytes": 1048576,
1782
+ "byteOffset": 11534336
1783
+ },
1784
+ {
1785
+ "name": "model.layers.12.mlp.gate_up_proj.q_scale",
1786
+ "shape": [
1787
+ 22016,
1788
+ 128
1789
+ ],
1790
+ "dtype": "float16",
1791
+ "format": "f32-to-bf16",
1792
+ "nbytes": 5636096,
1793
+ "byteOffset": 12582912
1794
+ },
1795
+ {
1796
+ "name": "model.layers.12.mlp.down_proj.q_scale",
1797
+ "shape": [
1798
+ 4096,
1799
+ 344
1800
+ ],
1801
+ "dtype": "float16",
1802
+ "format": "f32-to-bf16",
1803
+ "nbytes": 2818048,
1804
+ "byteOffset": 18219008
1805
+ },
1806
+ {
1807
+ "name": "model.layers.12.input_layernorm.weight",
1808
+ "shape": [
1809
+ 4096
1810
+ ],
1811
+ "dtype": "float16",
1812
+ "format": "f32-to-bf16",
1813
+ "nbytes": 8192,
1814
+ "byteOffset": 21037056
1815
+ },
1816
+ {
1817
+ "name": "model.layers.12.post_attention_layernorm.weight",
1818
+ "shape": [
1819
+ 4096
1820
+ ],
1821
+ "dtype": "float16",
1822
+ "format": "f32-to-bf16",
1823
+ "nbytes": 8192,
1824
+ "byteOffset": 21045248
1825
+ },
1826
+ {
1827
+ "name": "model.layers.13.self_attn.qkv_proj.q_scale",
1828
+ "shape": [
1829
+ 12288,
1830
+ 128
1831
+ ],
1832
+ "dtype": "float16",
1833
+ "format": "f32-to-bf16",
1834
+ "nbytes": 3145728,
1835
+ "byteOffset": 21053440
1836
+ },
1837
+ {
1838
+ "name": "model.layers.13.self_attn.o_proj.q_weight",
1839
+ "shape": [
1840
+ 4096,
1841
+ 512
1842
+ ],
1843
+ "dtype": "uint32",
1844
+ "format": "f32-to-bf16",
1845
+ "nbytes": 8388608,
1846
+ "byteOffset": 24199168
1847
+ }
1848
+ ],
1849
+ "md5sum": "c0eef17b98c90131d5b0628775bfbffa"
1850
+ },
1851
+ {
1852
+ "dataPath": "params_shard_48.bin",
1853
+ "format": "raw-shard",
1854
+ "nbytes": 45088768,
1855
+ "records": [
1856
+ {
1857
+ "name": "model.layers.13.mlp.gate_up_proj.q_weight",
1858
+ "shape": [
1859
+ 22016,
1860
+ 512
1861
+ ],
1862
+ "dtype": "uint32",
1863
+ "format": "f32-to-bf16",
1864
+ "nbytes": 45088768,
1865
+ "byteOffset": 0
1866
+ }
1867
+ ],
1868
+ "md5sum": "7233db81dba12add6ddd1ea9dfe9c3dd"
1869
+ },
1870
+ {
1871
+ "dataPath": "params_shard_49.bin",
1872
+ "format": "raw-shard",
1873
+ "nbytes": 25165824,
1874
+ "records": [
1875
+ {
1876
+ "name": "model.layers.14.self_attn.qkv_proj.q_weight",
1877
+ "shape": [
1878
+ 12288,
1879
+ 512
1880
+ ],
1881
+ "dtype": "uint32",
1882
+ "format": "f32-to-bf16",
1883
+ "nbytes": 25165824,
1884
+ "byteOffset": 0
1885
+ }
1886
+ ],
1887
+ "md5sum": "02a22884484fa383e5cafd9c7dfea8f5"
1888
+ },
1889
+ {
1890
+ "dataPath": "params_shard_50.bin",
1891
+ "format": "raw-shard",
1892
+ "nbytes": 32063488,
1893
+ "records": [
1894
+ {
1895
+ "name": "model.layers.13.self_attn.o_proj.q_scale",
1896
+ "shape": [
1897
+ 4096,
1898
+ 128
1899
+ ],
1900
+ "dtype": "float16",
1901
+ "format": "f32-to-bf16",
1902
+ "nbytes": 1048576,
1903
+ "byteOffset": 0
1904
+ },
1905
+ {
1906
+ "name": "model.layers.13.mlp.gate_up_proj.q_scale",
1907
+ "shape": [
1908
+ 22016,
1909
+ 128
1910
+ ],
1911
+ "dtype": "float16",
1912
+ "format": "f32-to-bf16",
1913
+ "nbytes": 5636096,
1914
+ "byteOffset": 1048576
1915
+ },
1916
+ {
1917
+ "name": "model.layers.13.mlp.down_proj.q_weight",
1918
+ "shape": [
1919
+ 4096,
1920
+ 1376
1921
+ ],
1922
+ "dtype": "uint32",
1923
+ "format": "f32-to-bf16",
1924
+ "nbytes": 22544384,
1925
+ "byteOffset": 6684672
1926
+ },
1927
+ {
1928
+ "name": "model.layers.13.mlp.down_proj.q_scale",
1929
+ "shape": [
1930
+ 4096,
1931
+ 344
1932
+ ],
1933
+ "dtype": "float16",
1934
+ "format": "f32-to-bf16",
1935
+ "nbytes": 2818048,
1936
+ "byteOffset": 29229056
1937
+ },
1938
+ {
1939
+ "name": "model.layers.13.input_layernorm.weight",
1940
+ "shape": [
1941
+ 4096
1942
+ ],
1943
+ "dtype": "float16",
1944
+ "format": "f32-to-bf16",
1945
+ "nbytes": 8192,
1946
+ "byteOffset": 32047104
1947
+ },
1948
+ {
1949
+ "name": "model.layers.13.post_attention_layernorm.weight",
1950
+ "shape": [
1951
+ 4096
1952
+ ],
1953
+ "dtype": "float16",
1954
+ "format": "f32-to-bf16",
1955
+ "nbytes": 8192,
1956
+ "byteOffset": 32055296
1957
+ }
1958
+ ],
1959
+ "md5sum": "7e0e19e0c2512dff82f2a8c442a16ec8"
1960
+ },
1961
+ {
1962
+ "dataPath": "params_shard_51.bin",
1963
+ "format": "raw-shard",
1964
+ "nbytes": 45088768,
1965
+ "records": [
1966
+ {
1967
+ "name": "model.layers.14.mlp.gate_up_proj.q_weight",
1968
+ "shape": [
1969
+ 22016,
1970
+ 512
1971
+ ],
1972
+ "dtype": "uint32",
1973
+ "format": "f32-to-bf16",
1974
+ "nbytes": 45088768,
1975
+ "byteOffset": 0
1976
+ }
1977
+ ],
1978
+ "md5sum": "1a5169035ab977aaf0e031f1e80dbf06"
1979
+ },
1980
+ {
1981
+ "dataPath": "params_shard_52.bin",
1982
+ "format": "raw-shard",
1983
+ "nbytes": 22544384,
1984
+ "records": [
1985
+ {
1986
+ "name": "model.layers.14.mlp.down_proj.q_weight",
1987
+ "shape": [
1988
+ 4096,
1989
+ 1376
1990
+ ],
1991
+ "dtype": "uint32",
1992
+ "format": "f32-to-bf16",
1993
+ "nbytes": 22544384,
1994
+ "byteOffset": 0
1995
+ }
1996
+ ],
1997
+ "md5sum": "a523a61ca293169dad379555f513f77b"
1998
+ },
1999
+ {
2000
+ "dataPath": "params_shard_53.bin",
2001
+ "format": "raw-shard",
2002
+ "nbytes": 25165824,
2003
+ "records": [
2004
+ {
2005
+ "name": "model.layers.15.self_attn.qkv_proj.q_weight",
2006
+ "shape": [
2007
+ 12288,
2008
+ 512
2009
+ ],
2010
+ "dtype": "uint32",
2011
+ "format": "f32-to-bf16",
2012
+ "nbytes": 25165824,
2013
+ "byteOffset": 0
2014
+ }
2015
+ ],
2016
+ "md5sum": "7f47a4a5ba0c5b1ffc169d4a0210d6e6"
2017
+ },
2018
+ {
2019
+ "dataPath": "params_shard_54.bin",
2020
+ "format": "raw-shard",
2021
+ "nbytes": 32587776,
2022
+ "records": [
2023
+ {
2024
+ "name": "model.layers.14.self_attn.qkv_proj.q_scale",
2025
+ "shape": [
2026
+ 12288,
2027
+ 128
2028
+ ],
2029
+ "dtype": "float16",
2030
+ "format": "f32-to-bf16",
2031
+ "nbytes": 3145728,
2032
+ "byteOffset": 0
2033
+ },
2034
+ {
2035
+ "name": "model.layers.14.self_attn.o_proj.q_weight",
2036
+ "shape": [
2037
+ 4096,
2038
+ 512
2039
+ ],
2040
+ "dtype": "uint32",
2041
+ "format": "f32-to-bf16",
2042
+ "nbytes": 8388608,
2043
+ "byteOffset": 3145728
2044
+ },
2045
+ {
2046
+ "name": "model.layers.14.self_attn.o_proj.q_scale",
2047
+ "shape": [
2048
+ 4096,
2049
+ 128
2050
+ ],
2051
+ "dtype": "float16",
2052
+ "format": "f32-to-bf16",
2053
+ "nbytes": 1048576,
2054
+ "byteOffset": 11534336
2055
+ },
2056
+ {
2057
+ "name": "model.layers.14.mlp.gate_up_proj.q_scale",
2058
+ "shape": [
2059
+ 22016,
2060
+ 128
2061
+ ],
2062
+ "dtype": "float16",
2063
+ "format": "f32-to-bf16",
2064
+ "nbytes": 5636096,
2065
+ "byteOffset": 12582912
2066
+ },
2067
+ {
2068
+ "name": "model.layers.14.mlp.down_proj.q_scale",
2069
+ "shape": [
2070
+ 4096,
2071
+ 344
2072
+ ],
2073
+ "dtype": "float16",
2074
+ "format": "f32-to-bf16",
2075
+ "nbytes": 2818048,
2076
+ "byteOffset": 18219008
2077
+ },
2078
+ {
2079
+ "name": "model.layers.14.input_layernorm.weight",
2080
+ "shape": [
2081
+ 4096
2082
+ ],
2083
+ "dtype": "float16",
2084
+ "format": "f32-to-bf16",
2085
+ "nbytes": 8192,
2086
+ "byteOffset": 21037056
2087
+ },
2088
+ {
2089
+ "name": "model.layers.14.post_attention_layernorm.weight",
2090
+ "shape": [
2091
+ 4096
2092
+ ],
2093
+ "dtype": "float16",
2094
+ "format": "f32-to-bf16",
2095
+ "nbytes": 8192,
2096
+ "byteOffset": 21045248
2097
+ },
2098
+ {
2099
+ "name": "model.layers.15.self_attn.qkv_proj.q_scale",
2100
+ "shape": [
2101
+ 12288,
2102
+ 128
2103
+ ],
2104
+ "dtype": "float16",
2105
+ "format": "f32-to-bf16",
2106
+ "nbytes": 3145728,
2107
+ "byteOffset": 21053440
2108
+ },
2109
+ {
2110
+ "name": "model.layers.15.self_attn.o_proj.q_weight",
2111
+ "shape": [
2112
+ 4096,
2113
+ 512
2114
+ ],
2115
+ "dtype": "uint32",
2116
+ "format": "f32-to-bf16",
2117
+ "nbytes": 8388608,
2118
+ "byteOffset": 24199168
2119
+ }
2120
+ ],
2121
+ "md5sum": "e5046b82ae71b7ee0b3c13cd8ad87d59"
2122
+ },
2123
+ {
2124
+ "dataPath": "params_shard_55.bin",
2125
+ "format": "raw-shard",
2126
+ "nbytes": 45088768,
2127
+ "records": [
2128
+ {
2129
+ "name": "model.layers.15.mlp.gate_up_proj.q_weight",
2130
+ "shape": [
2131
+ 22016,
2132
+ 512
2133
+ ],
2134
+ "dtype": "uint32",
2135
+ "format": "f32-to-bf16",
2136
+ "nbytes": 45088768,
2137
+ "byteOffset": 0
2138
+ }
2139
+ ],
2140
+ "md5sum": "858f179d9f31284e20a148ea0cd8c754"
2141
+ },
2142
+ {
2143
+ "dataPath": "params_shard_56.bin",
2144
+ "format": "raw-shard",
2145
+ "nbytes": 25165824,
2146
+ "records": [
2147
+ {
2148
+ "name": "model.layers.16.self_attn.qkv_proj.q_weight",
2149
+ "shape": [
2150
+ 12288,
2151
+ 512
2152
+ ],
2153
+ "dtype": "uint32",
2154
+ "format": "f32-to-bf16",
2155
+ "nbytes": 25165824,
2156
+ "byteOffset": 0
2157
+ }
2158
+ ],
2159
+ "md5sum": "ed50be593ce20d306d147e344640a81c"
2160
+ },
2161
+ {
2162
+ "dataPath": "params_shard_57.bin",
2163
+ "format": "raw-shard",
2164
+ "nbytes": 32063488,
2165
+ "records": [
2166
+ {
2167
+ "name": "model.layers.15.self_attn.o_proj.q_scale",
2168
+ "shape": [
2169
+ 4096,
2170
+ 128
2171
+ ],
2172
+ "dtype": "float16",
2173
+ "format": "f32-to-bf16",
2174
+ "nbytes": 1048576,
2175
+ "byteOffset": 0
2176
+ },
2177
+ {
2178
+ "name": "model.layers.15.mlp.gate_up_proj.q_scale",
2179
+ "shape": [
2180
+ 22016,
2181
+ 128
2182
+ ],
2183
+ "dtype": "float16",
2184
+ "format": "f32-to-bf16",
2185
+ "nbytes": 5636096,
2186
+ "byteOffset": 1048576
2187
+ },
2188
+ {
2189
+ "name": "model.layers.15.mlp.down_proj.q_weight",
2190
+ "shape": [
2191
+ 4096,
2192
+ 1376
2193
+ ],
2194
+ "dtype": "uint32",
2195
+ "format": "f32-to-bf16",
2196
+ "nbytes": 22544384,
2197
+ "byteOffset": 6684672
2198
+ },
2199
+ {
2200
+ "name": "model.layers.15.mlp.down_proj.q_scale",
2201
+ "shape": [
2202
+ 4096,
2203
+ 344
2204
+ ],
2205
+ "dtype": "float16",
2206
+ "format": "f32-to-bf16",
2207
+ "nbytes": 2818048,
2208
+ "byteOffset": 29229056
2209
+ },
2210
+ {
2211
+ "name": "model.layers.15.input_layernorm.weight",
2212
+ "shape": [
2213
+ 4096
2214
+ ],
2215
+ "dtype": "float16",
2216
+ "format": "f32-to-bf16",
2217
+ "nbytes": 8192,
2218
+ "byteOffset": 32047104
2219
+ },
2220
+ {
2221
+ "name": "model.layers.15.post_attention_layernorm.weight",
2222
+ "shape": [
2223
+ 4096
2224
+ ],
2225
+ "dtype": "float16",
2226
+ "format": "f32-to-bf16",
2227
+ "nbytes": 8192,
2228
+ "byteOffset": 32055296
2229
+ }
2230
+ ],
2231
+ "md5sum": "12e022e015e09ec4c17cb20bd7c68b16"
2232
+ },
2233
+ {
2234
+ "dataPath": "params_shard_58.bin",
2235
+ "format": "raw-shard",
2236
+ "nbytes": 45088768,
2237
+ "records": [
2238
+ {
2239
+ "name": "model.layers.16.mlp.gate_up_proj.q_weight",
2240
+ "shape": [
2241
+ 22016,
2242
+ 512
2243
+ ],
2244
+ "dtype": "uint32",
2245
+ "format": "f32-to-bf16",
2246
+ "nbytes": 45088768,
2247
+ "byteOffset": 0
2248
+ }
2249
+ ],
2250
+ "md5sum": "de337359b182b6121fdc9801a084428e"
2251
+ },
2252
+ {
2253
+ "dataPath": "params_shard_59.bin",
2254
+ "format": "raw-shard",
2255
+ "nbytes": 22544384,
2256
+ "records": [
2257
+ {
2258
+ "name": "model.layers.16.mlp.down_proj.q_weight",
2259
+ "shape": [
2260
+ 4096,
2261
+ 1376
2262
+ ],
2263
+ "dtype": "uint32",
2264
+ "format": "f32-to-bf16",
2265
+ "nbytes": 22544384,
2266
+ "byteOffset": 0
2267
+ }
2268
+ ],
2269
+ "md5sum": "a6d679ed3c77a771938b7162f0bb5dd5"
2270
+ },
2271
+ {
2272
+ "dataPath": "params_shard_60.bin",
2273
+ "format": "raw-shard",
2274
+ "nbytes": 65536000,
2275
+ "records": [
2276
+ {
2277
+ "name": "lm_head.q_weight",
2278
+ "shape": [
2279
+ 32000,
2280
+ 512
2281
+ ],
2282
+ "dtype": "uint32",
2283
+ "format": "f32-to-bf16",
2284
+ "nbytes": 65536000,
2285
+ "byteOffset": 0
2286
+ }
2287
+ ],
2288
+ "md5sum": "024e3785e7a04546cbb7709a95ac89b1"
2289
+ },
2290
+ {
2291
+ "dataPath": "params_shard_61.bin",
2292
+ "format": "raw-shard",
2293
+ "nbytes": 29253632,
2294
+ "records": [
2295
+ {
2296
+ "name": "model.layers.16.self_attn.qkv_proj.q_scale",
2297
+ "shape": [
2298
+ 12288,
2299
+ 128
2300
+ ],
2301
+ "dtype": "float16",
2302
+ "format": "f32-to-bf16",
2303
+ "nbytes": 3145728,
2304
+ "byteOffset": 0
2305
+ },
2306
+ {
2307
+ "name": "model.layers.16.self_attn.o_proj.q_weight",
2308
+ "shape": [
2309
+ 4096,
2310
+ 512
2311
+ ],
2312
+ "dtype": "uint32",
2313
+ "format": "f32-to-bf16",
2314
+ "nbytes": 8388608,
2315
+ "byteOffset": 3145728
2316
+ },
2317
+ {
2318
+ "name": "model.layers.16.self_attn.o_proj.q_scale",
2319
+ "shape": [
2320
+ 4096,
2321
+ 128
2322
+ ],
2323
+ "dtype": "float16",
2324
+ "format": "f32-to-bf16",
2325
+ "nbytes": 1048576,
2326
+ "byteOffset": 11534336
2327
+ },
2328
+ {
2329
+ "name": "model.layers.16.mlp.gate_up_proj.q_scale",
2330
+ "shape": [
2331
+ 22016,
2332
+ 128
2333
+ ],
2334
+ "dtype": "float16",
2335
+ "format": "f32-to-bf16",
2336
+ "nbytes": 5636096,
2337
+ "byteOffset": 12582912
2338
+ },
2339
+ {
2340
+ "name": "model.layers.16.mlp.down_proj.q_scale",
2341
+ "shape": [
2342
+ 4096,
2343
+ 344
2344
+ ],
2345
+ "dtype": "float16",
2346
+ "format": "f32-to-bf16",
2347
+ "nbytes": 2818048,
2348
+ "byteOffset": 18219008
2349
+ },
2350
+ {
2351
+ "name": "model.layers.16.input_layernorm.weight",
2352
+ "shape": [
2353
+ 4096
2354
+ ],
2355
+ "dtype": "float16",
2356
+ "format": "f32-to-bf16",
2357
+ "nbytes": 8192,
2358
+ "byteOffset": 21037056
2359
+ },
2360
+ {
2361
+ "name": "model.layers.16.post_attention_layernorm.weight",
2362
+ "shape": [
2363
+ 4096
2364
+ ],
2365
+ "dtype": "float16",
2366
+ "format": "f32-to-bf16",
2367
+ "nbytes": 8192,
2368
+ "byteOffset": 21045248
2369
+ },
2370
+ {
2371
+ "name": "model.norm.weight",
2372
+ "shape": [
2373
+ 4096
2374
+ ],
2375
+ "dtype": "float16",
2376
+ "format": "f32-to-bf16",
2377
+ "nbytes": 8192,
2378
+ "byteOffset": 21053440
2379
+ },
2380
+ {
2381
+ "name": "lm_head.q_scale",
2382
+ "shape": [
2383
+ 32000,
2384
+ 128
2385
+ ],
2386
+ "dtype": "float16",
2387
+ "format": "f32-to-bf16",
2388
+ "nbytes": 8192000,
2389
+ "byteOffset": 21061632
2390
+ }
2391
+ ],
2392
+ "md5sum": "e81c1bc7f90ff777240bee6be0140d49"
2393
+ }
2394
+ ]
2395
+ }
params_shard_0.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:48c349a2f31f2994dd70b2150b743bccdfeb916a0e85a58bb4028cda8eca343a
3
+ size 65536000
params_shard_1.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:232c7f283dd7addec9c68db29fcbdef5e2395a72848ca9d36cf7fa3e91571bf6
3
+ size 33357824
params_shard_10.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c3fba9ed6fc447c115212a3c65d8a4e5d923028d9a00c0d58d0b984b86fb1d78
3
+ size 22544384
params_shard_11.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:611e189fe3725b237e2f51e91271c052d4adad12b81ed333197d0ad425161c1b
3
+ size 25165824
params_shard_12.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:05150375ea424ff366bf21d1f9c19cccce1dc0a712732852d5f39b4d23720d61
3
+ size 32587776
params_shard_13.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7e82f022911caf0a166b471f0a189635d9b9d423ebca73e41c17dd4a2675d078
3
+ size 45088768
params_shard_14.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:14d4ef09ba61b07db001f78339d264ef9cf3ccd17715ae4ecd4c9eac89e6fe18
3
+ size 25165824
params_shard_15.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7355c0a99143a510813497404b1077f643427beafb5873952dd7a3edf122ccea
3
+ size 32063488
params_shard_16.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3110dc095a91d9c5d7732cc8318607cb6d2d0c2048fe5f819cc446de0423a829
3
+ size 45088768
params_shard_17.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:66a8ccec02d8102c7b36a92bbeb1249abab8b832fd868f1a9de21261147ab524
3
+ size 22544384
params_shard_18.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f02bd87afce6a52c6607f1598d3dc8708d3319f952f5e2eb59287dd0645125f3
3
+ size 25165824
params_shard_19.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6156d8c154a59d6513f68c35618b1e97991e67d4e4ef09871affb9e907598d05
3
+ size 32587776
params_shard_2.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:84481b731b6fc8d0311e6ed02226c5d6d06041cac4cb6031f893966336e35fef
3
+ size 45088768
params_shard_20.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:892399a4da47733cfa97bfef0288ea6ee6215116040150cb44897c06d4abcd96
3
+ size 45088768
params_shard_21.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7b9c00929c876f71e199bf57bbe6f344d2d1fe4b50ae8f33360e6275203fcc6d
3
+ size 25165824
params_shard_22.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ec7c0f814b12dddb8aa4d63efbbf46630712ec776154da1f7911af56620f5a22
3
+ size 32063488
params_shard_23.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3d4ba67ddc5589ae47faf134e95c672dfeb93a0280c651c69fd392cb38d0ccc2
3
+ size 45088768
params_shard_24.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:69ea6efcb2bfcc33109e5fe15239b627d2ffa070b48cab5b3ad3fd81e1ea4093
3
+ size 22544384
params_shard_25.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:835f3fca438745efffdc44201833352fb64b77ec6852b9424a74c72be56f7d12
3
+ size 25165824
params_shard_26.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a67e986f7c8c429bc1fee8b4e47faf846dfd67756f33bd456c617ad37e9a211c
3
+ size 32587776
params_shard_27.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:08ab699c8abc1ca1f7e6edb6b5ed1b5628eab00d55112ff29d7ce238303f38d3
3
+ size 45088768
params_shard_28.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:08e7389377095a4306492451ca98b1708916e2c5750c9fa7901e651d1affe842
3
+ size 25165824
params_shard_29.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:807f81212ac7677891b9b4b1e5ee4cf3b95badc931299073c59772c20bebfc01
3
+ size 32063488
params_shard_3.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:84bf0af25cb493e072a627b2fef3bcd45b38f5a1fec1a4aedb81d4bf01bc5558
3
+ size 22544384
params_shard_30.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c1156799677b09b1d5a702b9e8fc9da0eaa6d111f870c46560f24ed420968948
3
+ size 45088768
params_shard_31.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:663052f5c09fa024aca8b319d0c90b3bcc94b444f3684e41c76751f24a3c69b7
3
+ size 22544384
params_shard_32.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5dbbff4dfa7f706ff1f9ea0c2b80803849b7acd6efb6a35551973a014aa50db4
3
+ size 25165824
params_shard_33.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0faf2b7a11ffa0c567d49aa41818c020c169b2c975fd25641814e0becddc9591
3
+ size 32587776
params_shard_34.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:49694160abcb370686f63fc25f0035e69c502eaaeb62a32706b6dd1013526b34
3
+ size 45088768
params_shard_35.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c87f1f38bfdc6f10cca5f94bd5123431c9f99b8a5b21e9c170a4306e79a7fd20
3
+ size 25165824
params_shard_36.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c52a7340fb688f920537938b02f343446307fef1635eba9f6463850351432c23
3
+ size 32063488
params_shard_37.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7a9da35485a0e730b694cd20709cc308d0b18e9b4bfc5d94fc37854f5c9abc35
3
+ size 45088768
params_shard_38.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a186f1cad6448451a835131292de64e0bb7dd4b16096289e2bc2799877c6884e
3
+ size 22544384
params_shard_39.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e726483f7a72b379a8b49c395a8ffadf257eabfcbe5d3846c27a20e02d5f42bc
3
+ size 25165824
params_shard_4.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:42d68728888eaaef9f650ea8ef0a07d389e0e6b8e2a46edbd18e2424db4d4cf8
3
+ size 25165824
params_shard_40.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c0bbcfba211e6ac42cb4b6bed068fad3a463574c47bc12c483f98dc0b79577cf
3
+ size 32587776
params_shard_41.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:51297bfd00a5dfcc7cf4b027df2a1f743cad8defa87ed86682b8a9bbc0c8fc4c
3
+ size 45088768
params_shard_42.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:323592fd493c05fc1ed6f3c66bf64cde021e896dc35ec6918e573d4115fbfad8
3
+ size 25165824
params_shard_43.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1cfb8866339247b8ad5a227eb98892a7ab42a3b79048a7615ff86cec90c4b5a6
3
+ size 32063488
params_shard_44.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cdbe9ca2a7a058ae9f6c75d0dba7fbbf2d47567f6839c5e8048a9b17c4eade2b
3
+ size 45088768
params_shard_45.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:16acebe162cce992660e0f3ebbb23633ee9d8a8c9bb7345f4a370ca0102f085f
3
+ size 22544384
params_shard_46.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0d0492626b7a04b7f5f70df9ec02726e6fbbde15b2121ab70be50a08f3dcd9ef
3
+ size 25165824
params_shard_47.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:65f02a016ae867514a854053ee2a9f8433d83df4d6ccf25569decb4de23fadff
3
+ size 32587776
params_shard_48.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d5e2cf37f5c3d04df4767844e43555b91d005a267c37fa7ff12120ccce8d6d30
3
+ size 45088768
params_shard_49.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:192b1156be98311732c5cb4ae58aa87784c129591c0eedef7d92c08d2d352bc3
3
+ size 25165824
params_shard_5.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6eeeda57f2ff7cad4b1f033e2ebc2324acdc60e7d019bb5f1cff77eb07a98766
3
+ size 32587776
params_shard_50.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:73b8da4e82e7d005421b7f7e7cd6e5712b1ef72c7306bfbea44b2a881c96e57e
3
+ size 32063488
params_shard_51.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:269d3dfe9f2a6b7b75f276dd1a822e17523a8e4d14dc4234293e0c85e5bad1e6
3
+ size 45088768