llm-slayer commited on
Commit
d7f10a8
1 Parent(s): b9e38a8
This view is limited to 50 files because it contains too many changes.   See raw diff
mlc-chat-config.json ADDED
@@ -0,0 +1,72 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model_type": "llama",
3
+ "quantization": "q0f16",
4
+ "model_config": {
5
+ "hidden_size": 2048,
6
+ "intermediate_size": 5504,
7
+ "num_attention_heads": 16,
8
+ "num_hidden_layers": 24,
9
+ "rms_norm_eps": 1e-05,
10
+ "vocab_size": 32002,
11
+ "position_embedding_base": 10000.0,
12
+ "context_window_size": 2048,
13
+ "prefill_chunk_size": 2048,
14
+ "num_key_value_heads": 16,
15
+ "head_dim": 128,
16
+ "tensor_parallel_shards": 1,
17
+ "max_batch_size": 80
18
+ },
19
+ "vocab_size": 32002,
20
+ "context_window_size": 2048,
21
+ "sliding_window_size": -1,
22
+ "prefill_chunk_size": 2048,
23
+ "attention_sink_size": -1,
24
+ "tensor_parallel_shards": 1,
25
+ "mean_gen_len": 128,
26
+ "max_gen_len": 512,
27
+ "shift_fill_factor": 0.3,
28
+ "temperature": 0.3,
29
+ "presence_penalty": 0.0,
30
+ "frequency_penalty": 0.0,
31
+ "repetition_penalty": 1.05,
32
+ "top_p": 0.9,
33
+ "conv_template": {
34
+ "name": "croissantllm",
35
+ "system_template": "<s>",
36
+ "system_message": "",
37
+ "add_role_after_system_message": true,
38
+ "roles": {
39
+ "user": "<|im_start|>user",
40
+ "assistant": "<|im_start|>assistant"
41
+ },
42
+ "role_templates": {
43
+ "user": "{user_message}",
44
+ "assistant": "{assistant_message}",
45
+ "tool": "{tool_message}"
46
+ },
47
+ "messages": [],
48
+ "seps": [
49
+ "<|im_end|>\n"
50
+ ],
51
+ "role_content_sep": "\n",
52
+ "role_empty_sep": "\n",
53
+ "stop_str": [
54
+ "<|im_end|>",
55
+ "</s>"
56
+ ],
57
+ "stop_token_ids": [
58
+ 32000,
59
+ 2
60
+ ],
61
+ "function_string": "",
62
+ "use_function_calling": false
63
+ },
64
+ "pad_token_id": 2,
65
+ "bos_token_id": 1,
66
+ "eos_token_id": 2,
67
+ "tokenizer_files": [
68
+ "tokenizer.json",
69
+ "tokenizer_config.json"
70
+ ],
71
+ "version": "0.1.0"
72
+ }
ndarray-cache.json ADDED
@@ -0,0 +1,2169 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "metadata": {
3
+ "ParamSize": 147,
4
+ "ParamBytes": 2690863104.0,
5
+ "BitsPerParam": 16.0
6
+ },
7
+ "records": [
8
+ {
9
+ "dataPath": "params_shard_0.bin",
10
+ "format": "raw-shard",
11
+ "nbytes": 131080192,
12
+ "records": [
13
+ {
14
+ "name": "model.embed_tokens.weight",
15
+ "shape": [
16
+ 32002,
17
+ 2048
18
+ ],
19
+ "dtype": "float16",
20
+ "format": "f32-to-bf16",
21
+ "nbytes": 131080192,
22
+ "byteOffset": 0
23
+ }
24
+ ],
25
+ "md5sum": "5236df4d04f62453ab07342ace9ac63d"
26
+ },
27
+ {
28
+ "dataPath": "params_shard_1.bin",
29
+ "format": "raw-shard",
30
+ "nbytes": 25165824,
31
+ "records": [
32
+ {
33
+ "name": "model.layers.0.self_attn.qkv_proj.weight",
34
+ "shape": [
35
+ 6144,
36
+ 2048
37
+ ],
38
+ "dtype": "float16",
39
+ "format": "f32-to-bf16",
40
+ "nbytes": 25165824,
41
+ "byteOffset": 0
42
+ }
43
+ ],
44
+ "md5sum": "5bbb8806c5c52f5b8fa1204aab8174ec"
45
+ },
46
+ {
47
+ "dataPath": "params_shard_2.bin",
48
+ "format": "raw-shard",
49
+ "nbytes": 45088768,
50
+ "records": [
51
+ {
52
+ "name": "model.layers.0.mlp.gate_up_proj.weight",
53
+ "shape": [
54
+ 11008,
55
+ 2048
56
+ ],
57
+ "dtype": "float16",
58
+ "format": "f32-to-bf16",
59
+ "nbytes": 45088768,
60
+ "byteOffset": 0
61
+ }
62
+ ],
63
+ "md5sum": "9a9ced9b15e7fe96449ab9879f39a7de"
64
+ },
65
+ {
66
+ "dataPath": "params_shard_3.bin",
67
+ "format": "raw-shard",
68
+ "nbytes": 25165824,
69
+ "records": [
70
+ {
71
+ "name": "model.layers.1.self_attn.qkv_proj.weight",
72
+ "shape": [
73
+ 6144,
74
+ 2048
75
+ ],
76
+ "dtype": "float16",
77
+ "format": "f32-to-bf16",
78
+ "nbytes": 25165824,
79
+ "byteOffset": 0
80
+ }
81
+ ],
82
+ "md5sum": "c2fade5225fb488d6334c56610491342"
83
+ },
84
+ {
85
+ "dataPath": "params_shard_4.bin",
86
+ "format": "raw-shard",
87
+ "nbytes": 30941184,
88
+ "records": [
89
+ {
90
+ "name": "model.layers.0.self_attn.o_proj.weight",
91
+ "shape": [
92
+ 2048,
93
+ 2048
94
+ ],
95
+ "dtype": "float16",
96
+ "format": "f32-to-bf16",
97
+ "nbytes": 8388608,
98
+ "byteOffset": 0
99
+ },
100
+ {
101
+ "name": "model.layers.0.mlp.down_proj.weight",
102
+ "shape": [
103
+ 2048,
104
+ 5504
105
+ ],
106
+ "dtype": "float16",
107
+ "format": "f32-to-bf16",
108
+ "nbytes": 22544384,
109
+ "byteOffset": 8388608
110
+ },
111
+ {
112
+ "name": "model.layers.0.input_layernorm.weight",
113
+ "shape": [
114
+ 2048
115
+ ],
116
+ "dtype": "float16",
117
+ "format": "f32-to-bf16",
118
+ "nbytes": 4096,
119
+ "byteOffset": 30932992
120
+ },
121
+ {
122
+ "name": "model.layers.0.post_attention_layernorm.weight",
123
+ "shape": [
124
+ 2048
125
+ ],
126
+ "dtype": "float16",
127
+ "format": "f32-to-bf16",
128
+ "nbytes": 4096,
129
+ "byteOffset": 30937088
130
+ }
131
+ ],
132
+ "md5sum": "749e32c97aad4742b9e90caf3cf3a341"
133
+ },
134
+ {
135
+ "dataPath": "params_shard_5.bin",
136
+ "format": "raw-shard",
137
+ "nbytes": 45088768,
138
+ "records": [
139
+ {
140
+ "name": "model.layers.1.mlp.gate_up_proj.weight",
141
+ "shape": [
142
+ 11008,
143
+ 2048
144
+ ],
145
+ "dtype": "float16",
146
+ "format": "f32-to-bf16",
147
+ "nbytes": 45088768,
148
+ "byteOffset": 0
149
+ }
150
+ ],
151
+ "md5sum": "9ce0bf4542e4fe97ee580425d199ee8f"
152
+ },
153
+ {
154
+ "dataPath": "params_shard_6.bin",
155
+ "format": "raw-shard",
156
+ "nbytes": 25165824,
157
+ "records": [
158
+ {
159
+ "name": "model.layers.2.self_attn.qkv_proj.weight",
160
+ "shape": [
161
+ 6144,
162
+ 2048
163
+ ],
164
+ "dtype": "float16",
165
+ "format": "f32-to-bf16",
166
+ "nbytes": 25165824,
167
+ "byteOffset": 0
168
+ }
169
+ ],
170
+ "md5sum": "e5095112881d0804c94a8b084a6a9733"
171
+ },
172
+ {
173
+ "dataPath": "params_shard_7.bin",
174
+ "format": "raw-shard",
175
+ "nbytes": 30941184,
176
+ "records": [
177
+ {
178
+ "name": "model.layers.1.self_attn.o_proj.weight",
179
+ "shape": [
180
+ 2048,
181
+ 2048
182
+ ],
183
+ "dtype": "float16",
184
+ "format": "f32-to-bf16",
185
+ "nbytes": 8388608,
186
+ "byteOffset": 0
187
+ },
188
+ {
189
+ "name": "model.layers.1.mlp.down_proj.weight",
190
+ "shape": [
191
+ 2048,
192
+ 5504
193
+ ],
194
+ "dtype": "float16",
195
+ "format": "f32-to-bf16",
196
+ "nbytes": 22544384,
197
+ "byteOffset": 8388608
198
+ },
199
+ {
200
+ "name": "model.layers.1.input_layernorm.weight",
201
+ "shape": [
202
+ 2048
203
+ ],
204
+ "dtype": "float16",
205
+ "format": "f32-to-bf16",
206
+ "nbytes": 4096,
207
+ "byteOffset": 30932992
208
+ },
209
+ {
210
+ "name": "model.layers.1.post_attention_layernorm.weight",
211
+ "shape": [
212
+ 2048
213
+ ],
214
+ "dtype": "float16",
215
+ "format": "f32-to-bf16",
216
+ "nbytes": 4096,
217
+ "byteOffset": 30937088
218
+ }
219
+ ],
220
+ "md5sum": "231a5cd144ccb30ae318314b23608227"
221
+ },
222
+ {
223
+ "dataPath": "params_shard_8.bin",
224
+ "format": "raw-shard",
225
+ "nbytes": 45088768,
226
+ "records": [
227
+ {
228
+ "name": "model.layers.2.mlp.gate_up_proj.weight",
229
+ "shape": [
230
+ 11008,
231
+ 2048
232
+ ],
233
+ "dtype": "float16",
234
+ "format": "f32-to-bf16",
235
+ "nbytes": 45088768,
236
+ "byteOffset": 0
237
+ }
238
+ ],
239
+ "md5sum": "7d42e4c15eeff80f77e52df4ceb2bf43"
240
+ },
241
+ {
242
+ "dataPath": "params_shard_9.bin",
243
+ "format": "raw-shard",
244
+ "nbytes": 25165824,
245
+ "records": [
246
+ {
247
+ "name": "model.layers.3.self_attn.qkv_proj.weight",
248
+ "shape": [
249
+ 6144,
250
+ 2048
251
+ ],
252
+ "dtype": "float16",
253
+ "format": "f32-to-bf16",
254
+ "nbytes": 25165824,
255
+ "byteOffset": 0
256
+ }
257
+ ],
258
+ "md5sum": "0abaa24f053f587e9edc818c28011802"
259
+ },
260
+ {
261
+ "dataPath": "params_shard_10.bin",
262
+ "format": "raw-shard",
263
+ "nbytes": 30941184,
264
+ "records": [
265
+ {
266
+ "name": "model.layers.2.self_attn.o_proj.weight",
267
+ "shape": [
268
+ 2048,
269
+ 2048
270
+ ],
271
+ "dtype": "float16",
272
+ "format": "f32-to-bf16",
273
+ "nbytes": 8388608,
274
+ "byteOffset": 0
275
+ },
276
+ {
277
+ "name": "model.layers.2.mlp.down_proj.weight",
278
+ "shape": [
279
+ 2048,
280
+ 5504
281
+ ],
282
+ "dtype": "float16",
283
+ "format": "f32-to-bf16",
284
+ "nbytes": 22544384,
285
+ "byteOffset": 8388608
286
+ },
287
+ {
288
+ "name": "model.layers.2.input_layernorm.weight",
289
+ "shape": [
290
+ 2048
291
+ ],
292
+ "dtype": "float16",
293
+ "format": "f32-to-bf16",
294
+ "nbytes": 4096,
295
+ "byteOffset": 30932992
296
+ },
297
+ {
298
+ "name": "model.layers.2.post_attention_layernorm.weight",
299
+ "shape": [
300
+ 2048
301
+ ],
302
+ "dtype": "float16",
303
+ "format": "f32-to-bf16",
304
+ "nbytes": 4096,
305
+ "byteOffset": 30937088
306
+ }
307
+ ],
308
+ "md5sum": "f949015f98f9bba9f0df2bf776613f3b"
309
+ },
310
+ {
311
+ "dataPath": "params_shard_11.bin",
312
+ "format": "raw-shard",
313
+ "nbytes": 45088768,
314
+ "records": [
315
+ {
316
+ "name": "model.layers.3.mlp.gate_up_proj.weight",
317
+ "shape": [
318
+ 11008,
319
+ 2048
320
+ ],
321
+ "dtype": "float16",
322
+ "format": "f32-to-bf16",
323
+ "nbytes": 45088768,
324
+ "byteOffset": 0
325
+ }
326
+ ],
327
+ "md5sum": "ddc0f21869ee787e61240e00b7a60d1e"
328
+ },
329
+ {
330
+ "dataPath": "params_shard_12.bin",
331
+ "format": "raw-shard",
332
+ "nbytes": 25165824,
333
+ "records": [
334
+ {
335
+ "name": "model.layers.4.self_attn.qkv_proj.weight",
336
+ "shape": [
337
+ 6144,
338
+ 2048
339
+ ],
340
+ "dtype": "float16",
341
+ "format": "f32-to-bf16",
342
+ "nbytes": 25165824,
343
+ "byteOffset": 0
344
+ }
345
+ ],
346
+ "md5sum": "1efb1abf6f59a5797f8be4262df36e78"
347
+ },
348
+ {
349
+ "dataPath": "params_shard_13.bin",
350
+ "format": "raw-shard",
351
+ "nbytes": 30941184,
352
+ "records": [
353
+ {
354
+ "name": "model.layers.3.self_attn.o_proj.weight",
355
+ "shape": [
356
+ 2048,
357
+ 2048
358
+ ],
359
+ "dtype": "float16",
360
+ "format": "f32-to-bf16",
361
+ "nbytes": 8388608,
362
+ "byteOffset": 0
363
+ },
364
+ {
365
+ "name": "model.layers.3.mlp.down_proj.weight",
366
+ "shape": [
367
+ 2048,
368
+ 5504
369
+ ],
370
+ "dtype": "float16",
371
+ "format": "f32-to-bf16",
372
+ "nbytes": 22544384,
373
+ "byteOffset": 8388608
374
+ },
375
+ {
376
+ "name": "model.layers.3.input_layernorm.weight",
377
+ "shape": [
378
+ 2048
379
+ ],
380
+ "dtype": "float16",
381
+ "format": "f32-to-bf16",
382
+ "nbytes": 4096,
383
+ "byteOffset": 30932992
384
+ },
385
+ {
386
+ "name": "model.layers.3.post_attention_layernorm.weight",
387
+ "shape": [
388
+ 2048
389
+ ],
390
+ "dtype": "float16",
391
+ "format": "f32-to-bf16",
392
+ "nbytes": 4096,
393
+ "byteOffset": 30937088
394
+ }
395
+ ],
396
+ "md5sum": "0416f555eb82990a5b4750c73f3971b4"
397
+ },
398
+ {
399
+ "dataPath": "params_shard_14.bin",
400
+ "format": "raw-shard",
401
+ "nbytes": 45088768,
402
+ "records": [
403
+ {
404
+ "name": "model.layers.4.mlp.gate_up_proj.weight",
405
+ "shape": [
406
+ 11008,
407
+ 2048
408
+ ],
409
+ "dtype": "float16",
410
+ "format": "f32-to-bf16",
411
+ "nbytes": 45088768,
412
+ "byteOffset": 0
413
+ }
414
+ ],
415
+ "md5sum": "fde70d3fbf23e3964a59160f25bef5c3"
416
+ },
417
+ {
418
+ "dataPath": "params_shard_15.bin",
419
+ "format": "raw-shard",
420
+ "nbytes": 25165824,
421
+ "records": [
422
+ {
423
+ "name": "model.layers.5.self_attn.qkv_proj.weight",
424
+ "shape": [
425
+ 6144,
426
+ 2048
427
+ ],
428
+ "dtype": "float16",
429
+ "format": "f32-to-bf16",
430
+ "nbytes": 25165824,
431
+ "byteOffset": 0
432
+ }
433
+ ],
434
+ "md5sum": "a88d63a90234d431fa022cde58cd6558"
435
+ },
436
+ {
437
+ "dataPath": "params_shard_16.bin",
438
+ "format": "raw-shard",
439
+ "nbytes": 30941184,
440
+ "records": [
441
+ {
442
+ "name": "model.layers.4.self_attn.o_proj.weight",
443
+ "shape": [
444
+ 2048,
445
+ 2048
446
+ ],
447
+ "dtype": "float16",
448
+ "format": "f32-to-bf16",
449
+ "nbytes": 8388608,
450
+ "byteOffset": 0
451
+ },
452
+ {
453
+ "name": "model.layers.4.mlp.down_proj.weight",
454
+ "shape": [
455
+ 2048,
456
+ 5504
457
+ ],
458
+ "dtype": "float16",
459
+ "format": "f32-to-bf16",
460
+ "nbytes": 22544384,
461
+ "byteOffset": 8388608
462
+ },
463
+ {
464
+ "name": "model.layers.4.input_layernorm.weight",
465
+ "shape": [
466
+ 2048
467
+ ],
468
+ "dtype": "float16",
469
+ "format": "f32-to-bf16",
470
+ "nbytes": 4096,
471
+ "byteOffset": 30932992
472
+ },
473
+ {
474
+ "name": "model.layers.4.post_attention_layernorm.weight",
475
+ "shape": [
476
+ 2048
477
+ ],
478
+ "dtype": "float16",
479
+ "format": "f32-to-bf16",
480
+ "nbytes": 4096,
481
+ "byteOffset": 30937088
482
+ }
483
+ ],
484
+ "md5sum": "849bc602707e649cb03a3a507adf21f8"
485
+ },
486
+ {
487
+ "dataPath": "params_shard_17.bin",
488
+ "format": "raw-shard",
489
+ "nbytes": 45088768,
490
+ "records": [
491
+ {
492
+ "name": "model.layers.5.mlp.gate_up_proj.weight",
493
+ "shape": [
494
+ 11008,
495
+ 2048
496
+ ],
497
+ "dtype": "float16",
498
+ "format": "f32-to-bf16",
499
+ "nbytes": 45088768,
500
+ "byteOffset": 0
501
+ }
502
+ ],
503
+ "md5sum": "615449ac000262083d5eb3f9b4009385"
504
+ },
505
+ {
506
+ "dataPath": "params_shard_18.bin",
507
+ "format": "raw-shard",
508
+ "nbytes": 25165824,
509
+ "records": [
510
+ {
511
+ "name": "model.layers.6.self_attn.qkv_proj.weight",
512
+ "shape": [
513
+ 6144,
514
+ 2048
515
+ ],
516
+ "dtype": "float16",
517
+ "format": "f32-to-bf16",
518
+ "nbytes": 25165824,
519
+ "byteOffset": 0
520
+ }
521
+ ],
522
+ "md5sum": "37acdfc860edc57092f7bcafd7f6a477"
523
+ },
524
+ {
525
+ "dataPath": "params_shard_19.bin",
526
+ "format": "raw-shard",
527
+ "nbytes": 30941184,
528
+ "records": [
529
+ {
530
+ "name": "model.layers.5.self_attn.o_proj.weight",
531
+ "shape": [
532
+ 2048,
533
+ 2048
534
+ ],
535
+ "dtype": "float16",
536
+ "format": "f32-to-bf16",
537
+ "nbytes": 8388608,
538
+ "byteOffset": 0
539
+ },
540
+ {
541
+ "name": "model.layers.5.mlp.down_proj.weight",
542
+ "shape": [
543
+ 2048,
544
+ 5504
545
+ ],
546
+ "dtype": "float16",
547
+ "format": "f32-to-bf16",
548
+ "nbytes": 22544384,
549
+ "byteOffset": 8388608
550
+ },
551
+ {
552
+ "name": "model.layers.5.input_layernorm.weight",
553
+ "shape": [
554
+ 2048
555
+ ],
556
+ "dtype": "float16",
557
+ "format": "f32-to-bf16",
558
+ "nbytes": 4096,
559
+ "byteOffset": 30932992
560
+ },
561
+ {
562
+ "name": "model.layers.5.post_attention_layernorm.weight",
563
+ "shape": [
564
+ 2048
565
+ ],
566
+ "dtype": "float16",
567
+ "format": "f32-to-bf16",
568
+ "nbytes": 4096,
569
+ "byteOffset": 30937088
570
+ }
571
+ ],
572
+ "md5sum": "5c9e8abdddbdc09af7cfa13aed17a71c"
573
+ },
574
+ {
575
+ "dataPath": "params_shard_20.bin",
576
+ "format": "raw-shard",
577
+ "nbytes": 45088768,
578
+ "records": [
579
+ {
580
+ "name": "model.layers.6.mlp.gate_up_proj.weight",
581
+ "shape": [
582
+ 11008,
583
+ 2048
584
+ ],
585
+ "dtype": "float16",
586
+ "format": "f32-to-bf16",
587
+ "nbytes": 45088768,
588
+ "byteOffset": 0
589
+ }
590
+ ],
591
+ "md5sum": "44711bbbdd19bfc01307c09d06e42203"
592
+ },
593
+ {
594
+ "dataPath": "params_shard_21.bin",
595
+ "format": "raw-shard",
596
+ "nbytes": 25165824,
597
+ "records": [
598
+ {
599
+ "name": "model.layers.7.self_attn.qkv_proj.weight",
600
+ "shape": [
601
+ 6144,
602
+ 2048
603
+ ],
604
+ "dtype": "float16",
605
+ "format": "f32-to-bf16",
606
+ "nbytes": 25165824,
607
+ "byteOffset": 0
608
+ }
609
+ ],
610
+ "md5sum": "e6d2b5932ccd2183c429b292acd04d59"
611
+ },
612
+ {
613
+ "dataPath": "params_shard_22.bin",
614
+ "format": "raw-shard",
615
+ "nbytes": 30941184,
616
+ "records": [
617
+ {
618
+ "name": "model.layers.6.self_attn.o_proj.weight",
619
+ "shape": [
620
+ 2048,
621
+ 2048
622
+ ],
623
+ "dtype": "float16",
624
+ "format": "f32-to-bf16",
625
+ "nbytes": 8388608,
626
+ "byteOffset": 0
627
+ },
628
+ {
629
+ "name": "model.layers.6.mlp.down_proj.weight",
630
+ "shape": [
631
+ 2048,
632
+ 5504
633
+ ],
634
+ "dtype": "float16",
635
+ "format": "f32-to-bf16",
636
+ "nbytes": 22544384,
637
+ "byteOffset": 8388608
638
+ },
639
+ {
640
+ "name": "model.layers.6.input_layernorm.weight",
641
+ "shape": [
642
+ 2048
643
+ ],
644
+ "dtype": "float16",
645
+ "format": "f32-to-bf16",
646
+ "nbytes": 4096,
647
+ "byteOffset": 30932992
648
+ },
649
+ {
650
+ "name": "model.layers.6.post_attention_layernorm.weight",
651
+ "shape": [
652
+ 2048
653
+ ],
654
+ "dtype": "float16",
655
+ "format": "f32-to-bf16",
656
+ "nbytes": 4096,
657
+ "byteOffset": 30937088
658
+ }
659
+ ],
660
+ "md5sum": "25ca45812d1c73be8d1d63539f0e2f10"
661
+ },
662
+ {
663
+ "dataPath": "params_shard_23.bin",
664
+ "format": "raw-shard",
665
+ "nbytes": 45088768,
666
+ "records": [
667
+ {
668
+ "name": "model.layers.7.mlp.gate_up_proj.weight",
669
+ "shape": [
670
+ 11008,
671
+ 2048
672
+ ],
673
+ "dtype": "float16",
674
+ "format": "f32-to-bf16",
675
+ "nbytes": 45088768,
676
+ "byteOffset": 0
677
+ }
678
+ ],
679
+ "md5sum": "c4b9f9c8868f00e930cfb9aedb21f97c"
680
+ },
681
+ {
682
+ "dataPath": "params_shard_24.bin",
683
+ "format": "raw-shard",
684
+ "nbytes": 25165824,
685
+ "records": [
686
+ {
687
+ "name": "model.layers.8.self_attn.qkv_proj.weight",
688
+ "shape": [
689
+ 6144,
690
+ 2048
691
+ ],
692
+ "dtype": "float16",
693
+ "format": "f32-to-bf16",
694
+ "nbytes": 25165824,
695
+ "byteOffset": 0
696
+ }
697
+ ],
698
+ "md5sum": "0f04b837378155f443ae22913cb03317"
699
+ },
700
+ {
701
+ "dataPath": "params_shard_25.bin",
702
+ "format": "raw-shard",
703
+ "nbytes": 30941184,
704
+ "records": [
705
+ {
706
+ "name": "model.layers.7.self_attn.o_proj.weight",
707
+ "shape": [
708
+ 2048,
709
+ 2048
710
+ ],
711
+ "dtype": "float16",
712
+ "format": "f32-to-bf16",
713
+ "nbytes": 8388608,
714
+ "byteOffset": 0
715
+ },
716
+ {
717
+ "name": "model.layers.7.mlp.down_proj.weight",
718
+ "shape": [
719
+ 2048,
720
+ 5504
721
+ ],
722
+ "dtype": "float16",
723
+ "format": "f32-to-bf16",
724
+ "nbytes": 22544384,
725
+ "byteOffset": 8388608
726
+ },
727
+ {
728
+ "name": "model.layers.7.input_layernorm.weight",
729
+ "shape": [
730
+ 2048
731
+ ],
732
+ "dtype": "float16",
733
+ "format": "f32-to-bf16",
734
+ "nbytes": 4096,
735
+ "byteOffset": 30932992
736
+ },
737
+ {
738
+ "name": "model.layers.7.post_attention_layernorm.weight",
739
+ "shape": [
740
+ 2048
741
+ ],
742
+ "dtype": "float16",
743
+ "format": "f32-to-bf16",
744
+ "nbytes": 4096,
745
+ "byteOffset": 30937088
746
+ }
747
+ ],
748
+ "md5sum": "0dd1c4f443e8f56523af21af40a04f0c"
749
+ },
750
+ {
751
+ "dataPath": "params_shard_26.bin",
752
+ "format": "raw-shard",
753
+ "nbytes": 45088768,
754
+ "records": [
755
+ {
756
+ "name": "model.layers.8.mlp.gate_up_proj.weight",
757
+ "shape": [
758
+ 11008,
759
+ 2048
760
+ ],
761
+ "dtype": "float16",
762
+ "format": "f32-to-bf16",
763
+ "nbytes": 45088768,
764
+ "byteOffset": 0
765
+ }
766
+ ],
767
+ "md5sum": "42c4a6bf934639f00ca80ed51847dcd1"
768
+ },
769
+ {
770
+ "dataPath": "params_shard_27.bin",
771
+ "format": "raw-shard",
772
+ "nbytes": 25165824,
773
+ "records": [
774
+ {
775
+ "name": "model.layers.9.self_attn.qkv_proj.weight",
776
+ "shape": [
777
+ 6144,
778
+ 2048
779
+ ],
780
+ "dtype": "float16",
781
+ "format": "f32-to-bf16",
782
+ "nbytes": 25165824,
783
+ "byteOffset": 0
784
+ }
785
+ ],
786
+ "md5sum": "90505af0ffe93522225df2e56bc0aab6"
787
+ },
788
+ {
789
+ "dataPath": "params_shard_28.bin",
790
+ "format": "raw-shard",
791
+ "nbytes": 30941184,
792
+ "records": [
793
+ {
794
+ "name": "model.layers.8.self_attn.o_proj.weight",
795
+ "shape": [
796
+ 2048,
797
+ 2048
798
+ ],
799
+ "dtype": "float16",
800
+ "format": "f32-to-bf16",
801
+ "nbytes": 8388608,
802
+ "byteOffset": 0
803
+ },
804
+ {
805
+ "name": "model.layers.8.mlp.down_proj.weight",
806
+ "shape": [
807
+ 2048,
808
+ 5504
809
+ ],
810
+ "dtype": "float16",
811
+ "format": "f32-to-bf16",
812
+ "nbytes": 22544384,
813
+ "byteOffset": 8388608
814
+ },
815
+ {
816
+ "name": "model.layers.8.input_layernorm.weight",
817
+ "shape": [
818
+ 2048
819
+ ],
820
+ "dtype": "float16",
821
+ "format": "f32-to-bf16",
822
+ "nbytes": 4096,
823
+ "byteOffset": 30932992
824
+ },
825
+ {
826
+ "name": "model.layers.8.post_attention_layernorm.weight",
827
+ "shape": [
828
+ 2048
829
+ ],
830
+ "dtype": "float16",
831
+ "format": "f32-to-bf16",
832
+ "nbytes": 4096,
833
+ "byteOffset": 30937088
834
+ }
835
+ ],
836
+ "md5sum": "45d54535e2451ae43f699ed32ecd6ce2"
837
+ },
838
+ {
839
+ "dataPath": "params_shard_29.bin",
840
+ "format": "raw-shard",
841
+ "nbytes": 45088768,
842
+ "records": [
843
+ {
844
+ "name": "model.layers.9.mlp.gate_up_proj.weight",
845
+ "shape": [
846
+ 11008,
847
+ 2048
848
+ ],
849
+ "dtype": "float16",
850
+ "format": "f32-to-bf16",
851
+ "nbytes": 45088768,
852
+ "byteOffset": 0
853
+ }
854
+ ],
855
+ "md5sum": "b80d76fadef6c9b41b4bdf827732e61c"
856
+ },
857
+ {
858
+ "dataPath": "params_shard_30.bin",
859
+ "format": "raw-shard",
860
+ "nbytes": 25165824,
861
+ "records": [
862
+ {
863
+ "name": "model.layers.10.self_attn.qkv_proj.weight",
864
+ "shape": [
865
+ 6144,
866
+ 2048
867
+ ],
868
+ "dtype": "float16",
869
+ "format": "f32-to-bf16",
870
+ "nbytes": 25165824,
871
+ "byteOffset": 0
872
+ }
873
+ ],
874
+ "md5sum": "c9137c3f240c15f682707d7fa6ac3ddf"
875
+ },
876
+ {
877
+ "dataPath": "params_shard_31.bin",
878
+ "format": "raw-shard",
879
+ "nbytes": 30941184,
880
+ "records": [
881
+ {
882
+ "name": "model.layers.9.self_attn.o_proj.weight",
883
+ "shape": [
884
+ 2048,
885
+ 2048
886
+ ],
887
+ "dtype": "float16",
888
+ "format": "f32-to-bf16",
889
+ "nbytes": 8388608,
890
+ "byteOffset": 0
891
+ },
892
+ {
893
+ "name": "model.layers.9.mlp.down_proj.weight",
894
+ "shape": [
895
+ 2048,
896
+ 5504
897
+ ],
898
+ "dtype": "float16",
899
+ "format": "f32-to-bf16",
900
+ "nbytes": 22544384,
901
+ "byteOffset": 8388608
902
+ },
903
+ {
904
+ "name": "model.layers.9.input_layernorm.weight",
905
+ "shape": [
906
+ 2048
907
+ ],
908
+ "dtype": "float16",
909
+ "format": "f32-to-bf16",
910
+ "nbytes": 4096,
911
+ "byteOffset": 30932992
912
+ },
913
+ {
914
+ "name": "model.layers.9.post_attention_layernorm.weight",
915
+ "shape": [
916
+ 2048
917
+ ],
918
+ "dtype": "float16",
919
+ "format": "f32-to-bf16",
920
+ "nbytes": 4096,
921
+ "byteOffset": 30937088
922
+ }
923
+ ],
924
+ "md5sum": "6a3b1c5c34e386b6bcb9a3ad29668d09"
925
+ },
926
+ {
927
+ "dataPath": "params_shard_32.bin",
928
+ "format": "raw-shard",
929
+ "nbytes": 45088768,
930
+ "records": [
931
+ {
932
+ "name": "model.layers.10.mlp.gate_up_proj.weight",
933
+ "shape": [
934
+ 11008,
935
+ 2048
936
+ ],
937
+ "dtype": "float16",
938
+ "format": "f32-to-bf16",
939
+ "nbytes": 45088768,
940
+ "byteOffset": 0
941
+ }
942
+ ],
943
+ "md5sum": "c390e99990a0126f98d61f27ab2c68a4"
944
+ },
945
+ {
946
+ "dataPath": "params_shard_33.bin",
947
+ "format": "raw-shard",
948
+ "nbytes": 25165824,
949
+ "records": [
950
+ {
951
+ "name": "model.layers.11.self_attn.qkv_proj.weight",
952
+ "shape": [
953
+ 6144,
954
+ 2048
955
+ ],
956
+ "dtype": "float16",
957
+ "format": "f32-to-bf16",
958
+ "nbytes": 25165824,
959
+ "byteOffset": 0
960
+ }
961
+ ],
962
+ "md5sum": "612efdbcbe7541a4244a76d6c1c8f484"
963
+ },
964
+ {
965
+ "dataPath": "params_shard_34.bin",
966
+ "format": "raw-shard",
967
+ "nbytes": 30941184,
968
+ "records": [
969
+ {
970
+ "name": "model.layers.10.self_attn.o_proj.weight",
971
+ "shape": [
972
+ 2048,
973
+ 2048
974
+ ],
975
+ "dtype": "float16",
976
+ "format": "f32-to-bf16",
977
+ "nbytes": 8388608,
978
+ "byteOffset": 0
979
+ },
980
+ {
981
+ "name": "model.layers.10.mlp.down_proj.weight",
982
+ "shape": [
983
+ 2048,
984
+ 5504
985
+ ],
986
+ "dtype": "float16",
987
+ "format": "f32-to-bf16",
988
+ "nbytes": 22544384,
989
+ "byteOffset": 8388608
990
+ },
991
+ {
992
+ "name": "model.layers.10.input_layernorm.weight",
993
+ "shape": [
994
+ 2048
995
+ ],
996
+ "dtype": "float16",
997
+ "format": "f32-to-bf16",
998
+ "nbytes": 4096,
999
+ "byteOffset": 30932992
1000
+ },
1001
+ {
1002
+ "name": "model.layers.10.post_attention_layernorm.weight",
1003
+ "shape": [
1004
+ 2048
1005
+ ],
1006
+ "dtype": "float16",
1007
+ "format": "f32-to-bf16",
1008
+ "nbytes": 4096,
1009
+ "byteOffset": 30937088
1010
+ }
1011
+ ],
1012
+ "md5sum": "30fac35a87a1e5628baac643050bc09f"
1013
+ },
1014
+ {
1015
+ "dataPath": "params_shard_35.bin",
1016
+ "format": "raw-shard",
1017
+ "nbytes": 45088768,
1018
+ "records": [
1019
+ {
1020
+ "name": "model.layers.11.mlp.gate_up_proj.weight",
1021
+ "shape": [
1022
+ 11008,
1023
+ 2048
1024
+ ],
1025
+ "dtype": "float16",
1026
+ "format": "f32-to-bf16",
1027
+ "nbytes": 45088768,
1028
+ "byteOffset": 0
1029
+ }
1030
+ ],
1031
+ "md5sum": "b99ee6242e460e82dd53143c91c45f7b"
1032
+ },
1033
+ {
1034
+ "dataPath": "params_shard_36.bin",
1035
+ "format": "raw-shard",
1036
+ "nbytes": 25165824,
1037
+ "records": [
1038
+ {
1039
+ "name": "model.layers.12.self_attn.qkv_proj.weight",
1040
+ "shape": [
1041
+ 6144,
1042
+ 2048
1043
+ ],
1044
+ "dtype": "float16",
1045
+ "format": "f32-to-bf16",
1046
+ "nbytes": 25165824,
1047
+ "byteOffset": 0
1048
+ }
1049
+ ],
1050
+ "md5sum": "91f6d3eb98038ec2a068a915f3f14973"
1051
+ },
1052
+ {
1053
+ "dataPath": "params_shard_37.bin",
1054
+ "format": "raw-shard",
1055
+ "nbytes": 30941184,
1056
+ "records": [
1057
+ {
1058
+ "name": "model.layers.11.self_attn.o_proj.weight",
1059
+ "shape": [
1060
+ 2048,
1061
+ 2048
1062
+ ],
1063
+ "dtype": "float16",
1064
+ "format": "f32-to-bf16",
1065
+ "nbytes": 8388608,
1066
+ "byteOffset": 0
1067
+ },
1068
+ {
1069
+ "name": "model.layers.11.mlp.down_proj.weight",
1070
+ "shape": [
1071
+ 2048,
1072
+ 5504
1073
+ ],
1074
+ "dtype": "float16",
1075
+ "format": "f32-to-bf16",
1076
+ "nbytes": 22544384,
1077
+ "byteOffset": 8388608
1078
+ },
1079
+ {
1080
+ "name": "model.layers.11.input_layernorm.weight",
1081
+ "shape": [
1082
+ 2048
1083
+ ],
1084
+ "dtype": "float16",
1085
+ "format": "f32-to-bf16",
1086
+ "nbytes": 4096,
1087
+ "byteOffset": 30932992
1088
+ },
1089
+ {
1090
+ "name": "model.layers.11.post_attention_layernorm.weight",
1091
+ "shape": [
1092
+ 2048
1093
+ ],
1094
+ "dtype": "float16",
1095
+ "format": "f32-to-bf16",
1096
+ "nbytes": 4096,
1097
+ "byteOffset": 30937088
1098
+ }
1099
+ ],
1100
+ "md5sum": "ff414be19f0b4d6522885e7729f1b203"
1101
+ },
1102
+ {
1103
+ "dataPath": "params_shard_38.bin",
1104
+ "format": "raw-shard",
1105
+ "nbytes": 45088768,
1106
+ "records": [
1107
+ {
1108
+ "name": "model.layers.12.mlp.gate_up_proj.weight",
1109
+ "shape": [
1110
+ 11008,
1111
+ 2048
1112
+ ],
1113
+ "dtype": "float16",
1114
+ "format": "f32-to-bf16",
1115
+ "nbytes": 45088768,
1116
+ "byteOffset": 0
1117
+ }
1118
+ ],
1119
+ "md5sum": "a17cba4e8a82a54ebb7bfa0f67fa7f91"
1120
+ },
1121
+ {
1122
+ "dataPath": "params_shard_39.bin",
1123
+ "format": "raw-shard",
1124
+ "nbytes": 25165824,
1125
+ "records": [
1126
+ {
1127
+ "name": "model.layers.13.self_attn.qkv_proj.weight",
1128
+ "shape": [
1129
+ 6144,
1130
+ 2048
1131
+ ],
1132
+ "dtype": "float16",
1133
+ "format": "f32-to-bf16",
1134
+ "nbytes": 25165824,
1135
+ "byteOffset": 0
1136
+ }
1137
+ ],
1138
+ "md5sum": "db19e04c8ffdbc2794a128855c5527e2"
1139
+ },
1140
+ {
1141
+ "dataPath": "params_shard_40.bin",
1142
+ "format": "raw-shard",
1143
+ "nbytes": 30941184,
1144
+ "records": [
1145
+ {
1146
+ "name": "model.layers.12.self_attn.o_proj.weight",
1147
+ "shape": [
1148
+ 2048,
1149
+ 2048
1150
+ ],
1151
+ "dtype": "float16",
1152
+ "format": "f32-to-bf16",
1153
+ "nbytes": 8388608,
1154
+ "byteOffset": 0
1155
+ },
1156
+ {
1157
+ "name": "model.layers.12.mlp.down_proj.weight",
1158
+ "shape": [
1159
+ 2048,
1160
+ 5504
1161
+ ],
1162
+ "dtype": "float16",
1163
+ "format": "f32-to-bf16",
1164
+ "nbytes": 22544384,
1165
+ "byteOffset": 8388608
1166
+ },
1167
+ {
1168
+ "name": "model.layers.12.input_layernorm.weight",
1169
+ "shape": [
1170
+ 2048
1171
+ ],
1172
+ "dtype": "float16",
1173
+ "format": "f32-to-bf16",
1174
+ "nbytes": 4096,
1175
+ "byteOffset": 30932992
1176
+ },
1177
+ {
1178
+ "name": "model.layers.12.post_attention_layernorm.weight",
1179
+ "shape": [
1180
+ 2048
1181
+ ],
1182
+ "dtype": "float16",
1183
+ "format": "f32-to-bf16",
1184
+ "nbytes": 4096,
1185
+ "byteOffset": 30937088
1186
+ }
1187
+ ],
1188
+ "md5sum": "60d9d5eed2b02fd0f12e0f2a85cdf1f6"
1189
+ },
1190
+ {
1191
+ "dataPath": "params_shard_41.bin",
1192
+ "format": "raw-shard",
1193
+ "nbytes": 45088768,
1194
+ "records": [
1195
+ {
1196
+ "name": "model.layers.13.mlp.gate_up_proj.weight",
1197
+ "shape": [
1198
+ 11008,
1199
+ 2048
1200
+ ],
1201
+ "dtype": "float16",
1202
+ "format": "f32-to-bf16",
1203
+ "nbytes": 45088768,
1204
+ "byteOffset": 0
1205
+ }
1206
+ ],
1207
+ "md5sum": "9cbe6a6ac2ec3abf9902f88a2eb9a237"
1208
+ },
1209
+ {
1210
+ "dataPath": "params_shard_42.bin",
1211
+ "format": "raw-shard",
1212
+ "nbytes": 25165824,
1213
+ "records": [
1214
+ {
1215
+ "name": "model.layers.14.self_attn.qkv_proj.weight",
1216
+ "shape": [
1217
+ 6144,
1218
+ 2048
1219
+ ],
1220
+ "dtype": "float16",
1221
+ "format": "f32-to-bf16",
1222
+ "nbytes": 25165824,
1223
+ "byteOffset": 0
1224
+ }
1225
+ ],
1226
+ "md5sum": "8b404bc762dd275697b79df4df22f9f9"
1227
+ },
1228
+ {
1229
+ "dataPath": "params_shard_43.bin",
1230
+ "format": "raw-shard",
1231
+ "nbytes": 30941184,
1232
+ "records": [
1233
+ {
1234
+ "name": "model.layers.13.self_attn.o_proj.weight",
1235
+ "shape": [
1236
+ 2048,
1237
+ 2048
1238
+ ],
1239
+ "dtype": "float16",
1240
+ "format": "f32-to-bf16",
1241
+ "nbytes": 8388608,
1242
+ "byteOffset": 0
1243
+ },
1244
+ {
1245
+ "name": "model.layers.13.mlp.down_proj.weight",
1246
+ "shape": [
1247
+ 2048,
1248
+ 5504
1249
+ ],
1250
+ "dtype": "float16",
1251
+ "format": "f32-to-bf16",
1252
+ "nbytes": 22544384,
1253
+ "byteOffset": 8388608
1254
+ },
1255
+ {
1256
+ "name": "model.layers.13.input_layernorm.weight",
1257
+ "shape": [
1258
+ 2048
1259
+ ],
1260
+ "dtype": "float16",
1261
+ "format": "f32-to-bf16",
1262
+ "nbytes": 4096,
1263
+ "byteOffset": 30932992
1264
+ },
1265
+ {
1266
+ "name": "model.layers.13.post_attention_layernorm.weight",
1267
+ "shape": [
1268
+ 2048
1269
+ ],
1270
+ "dtype": "float16",
1271
+ "format": "f32-to-bf16",
1272
+ "nbytes": 4096,
1273
+ "byteOffset": 30937088
1274
+ }
1275
+ ],
1276
+ "md5sum": "731b40cd63d88cdcbb65007b1fa72174"
1277
+ },
1278
+ {
1279
+ "dataPath": "params_shard_44.bin",
1280
+ "format": "raw-shard",
1281
+ "nbytes": 45088768,
1282
+ "records": [
1283
+ {
1284
+ "name": "model.layers.14.mlp.gate_up_proj.weight",
1285
+ "shape": [
1286
+ 11008,
1287
+ 2048
1288
+ ],
1289
+ "dtype": "float16",
1290
+ "format": "f32-to-bf16",
1291
+ "nbytes": 45088768,
1292
+ "byteOffset": 0
1293
+ }
1294
+ ],
1295
+ "md5sum": "397b3dc04a75979d4866a04f237f49a8"
1296
+ },
1297
+ {
1298
+ "dataPath": "params_shard_45.bin",
1299
+ "format": "raw-shard",
1300
+ "nbytes": 25165824,
1301
+ "records": [
1302
+ {
1303
+ "name": "model.layers.15.self_attn.qkv_proj.weight",
1304
+ "shape": [
1305
+ 6144,
1306
+ 2048
1307
+ ],
1308
+ "dtype": "float16",
1309
+ "format": "f32-to-bf16",
1310
+ "nbytes": 25165824,
1311
+ "byteOffset": 0
1312
+ }
1313
+ ],
1314
+ "md5sum": "cc8933cb7a1e5b2a08896c5fe577f405"
1315
+ },
1316
+ {
1317
+ "dataPath": "params_shard_46.bin",
1318
+ "format": "raw-shard",
1319
+ "nbytes": 30941184,
1320
+ "records": [
1321
+ {
1322
+ "name": "model.layers.14.self_attn.o_proj.weight",
1323
+ "shape": [
1324
+ 2048,
1325
+ 2048
1326
+ ],
1327
+ "dtype": "float16",
1328
+ "format": "f32-to-bf16",
1329
+ "nbytes": 8388608,
1330
+ "byteOffset": 0
1331
+ },
1332
+ {
1333
+ "name": "model.layers.14.mlp.down_proj.weight",
1334
+ "shape": [
1335
+ 2048,
1336
+ 5504
1337
+ ],
1338
+ "dtype": "float16",
1339
+ "format": "f32-to-bf16",
1340
+ "nbytes": 22544384,
1341
+ "byteOffset": 8388608
1342
+ },
1343
+ {
1344
+ "name": "model.layers.14.input_layernorm.weight",
1345
+ "shape": [
1346
+ 2048
1347
+ ],
1348
+ "dtype": "float16",
1349
+ "format": "f32-to-bf16",
1350
+ "nbytes": 4096,
1351
+ "byteOffset": 30932992
1352
+ },
1353
+ {
1354
+ "name": "model.layers.14.post_attention_layernorm.weight",
1355
+ "shape": [
1356
+ 2048
1357
+ ],
1358
+ "dtype": "float16",
1359
+ "format": "f32-to-bf16",
1360
+ "nbytes": 4096,
1361
+ "byteOffset": 30937088
1362
+ }
1363
+ ],
1364
+ "md5sum": "71c0be626a091a10f7535e124bdac1d5"
1365
+ },
1366
+ {
1367
+ "dataPath": "params_shard_47.bin",
1368
+ "format": "raw-shard",
1369
+ "nbytes": 45088768,
1370
+ "records": [
1371
+ {
1372
+ "name": "model.layers.15.mlp.gate_up_proj.weight",
1373
+ "shape": [
1374
+ 11008,
1375
+ 2048
1376
+ ],
1377
+ "dtype": "float16",
1378
+ "format": "f32-to-bf16",
1379
+ "nbytes": 45088768,
1380
+ "byteOffset": 0
1381
+ }
1382
+ ],
1383
+ "md5sum": "f322f06b5d5403f2d34f38d4ca70b2bf"
1384
+ },
1385
+ {
1386
+ "dataPath": "params_shard_48.bin",
1387
+ "format": "raw-shard",
1388
+ "nbytes": 25165824,
1389
+ "records": [
1390
+ {
1391
+ "name": "model.layers.16.self_attn.qkv_proj.weight",
1392
+ "shape": [
1393
+ 6144,
1394
+ 2048
1395
+ ],
1396
+ "dtype": "float16",
1397
+ "format": "f32-to-bf16",
1398
+ "nbytes": 25165824,
1399
+ "byteOffset": 0
1400
+ }
1401
+ ],
1402
+ "md5sum": "9a01f6391a3122b069730a70ee48b125"
1403
+ },
1404
+ {
1405
+ "dataPath": "params_shard_49.bin",
1406
+ "format": "raw-shard",
1407
+ "nbytes": 30941184,
1408
+ "records": [
1409
+ {
1410
+ "name": "model.layers.15.self_attn.o_proj.weight",
1411
+ "shape": [
1412
+ 2048,
1413
+ 2048
1414
+ ],
1415
+ "dtype": "float16",
1416
+ "format": "f32-to-bf16",
1417
+ "nbytes": 8388608,
1418
+ "byteOffset": 0
1419
+ },
1420
+ {
1421
+ "name": "model.layers.15.mlp.down_proj.weight",
1422
+ "shape": [
1423
+ 2048,
1424
+ 5504
1425
+ ],
1426
+ "dtype": "float16",
1427
+ "format": "f32-to-bf16",
1428
+ "nbytes": 22544384,
1429
+ "byteOffset": 8388608
1430
+ },
1431
+ {
1432
+ "name": "model.layers.15.input_layernorm.weight",
1433
+ "shape": [
1434
+ 2048
1435
+ ],
1436
+ "dtype": "float16",
1437
+ "format": "f32-to-bf16",
1438
+ "nbytes": 4096,
1439
+ "byteOffset": 30932992
1440
+ },
1441
+ {
1442
+ "name": "model.layers.15.post_attention_layernorm.weight",
1443
+ "shape": [
1444
+ 2048
1445
+ ],
1446
+ "dtype": "float16",
1447
+ "format": "f32-to-bf16",
1448
+ "nbytes": 4096,
1449
+ "byteOffset": 30937088
1450
+ }
1451
+ ],
1452
+ "md5sum": "6ee9b8b140875057942405392bdadb60"
1453
+ },
1454
+ {
1455
+ "dataPath": "params_shard_50.bin",
1456
+ "format": "raw-shard",
1457
+ "nbytes": 45088768,
1458
+ "records": [
1459
+ {
1460
+ "name": "model.layers.16.mlp.gate_up_proj.weight",
1461
+ "shape": [
1462
+ 11008,
1463
+ 2048
1464
+ ],
1465
+ "dtype": "float16",
1466
+ "format": "f32-to-bf16",
1467
+ "nbytes": 45088768,
1468
+ "byteOffset": 0
1469
+ }
1470
+ ],
1471
+ "md5sum": "ab47f53c868ec49a3a5002e90e673c29"
1472
+ },
1473
+ {
1474
+ "dataPath": "params_shard_51.bin",
1475
+ "format": "raw-shard",
1476
+ "nbytes": 25165824,
1477
+ "records": [
1478
+ {
1479
+ "name": "model.layers.17.self_attn.qkv_proj.weight",
1480
+ "shape": [
1481
+ 6144,
1482
+ 2048
1483
+ ],
1484
+ "dtype": "float16",
1485
+ "format": "f32-to-bf16",
1486
+ "nbytes": 25165824,
1487
+ "byteOffset": 0
1488
+ }
1489
+ ],
1490
+ "md5sum": "f4d03d837be7afe115242d5d6ab9f1fa"
1491
+ },
1492
+ {
1493
+ "dataPath": "params_shard_52.bin",
1494
+ "format": "raw-shard",
1495
+ "nbytes": 30941184,
1496
+ "records": [
1497
+ {
1498
+ "name": "model.layers.16.self_attn.o_proj.weight",
1499
+ "shape": [
1500
+ 2048,
1501
+ 2048
1502
+ ],
1503
+ "dtype": "float16",
1504
+ "format": "f32-to-bf16",
1505
+ "nbytes": 8388608,
1506
+ "byteOffset": 0
1507
+ },
1508
+ {
1509
+ "name": "model.layers.16.mlp.down_proj.weight",
1510
+ "shape": [
1511
+ 2048,
1512
+ 5504
1513
+ ],
1514
+ "dtype": "float16",
1515
+ "format": "f32-to-bf16",
1516
+ "nbytes": 22544384,
1517
+ "byteOffset": 8388608
1518
+ },
1519
+ {
1520
+ "name": "model.layers.16.input_layernorm.weight",
1521
+ "shape": [
1522
+ 2048
1523
+ ],
1524
+ "dtype": "float16",
1525
+ "format": "f32-to-bf16",
1526
+ "nbytes": 4096,
1527
+ "byteOffset": 30932992
1528
+ },
1529
+ {
1530
+ "name": "model.layers.16.post_attention_layernorm.weight",
1531
+ "shape": [
1532
+ 2048
1533
+ ],
1534
+ "dtype": "float16",
1535
+ "format": "f32-to-bf16",
1536
+ "nbytes": 4096,
1537
+ "byteOffset": 30937088
1538
+ }
1539
+ ],
1540
+ "md5sum": "89cd4701668a770f350c7fae062a1387"
1541
+ },
1542
+ {
1543
+ "dataPath": "params_shard_53.bin",
1544
+ "format": "raw-shard",
1545
+ "nbytes": 45088768,
1546
+ "records": [
1547
+ {
1548
+ "name": "model.layers.17.mlp.gate_up_proj.weight",
1549
+ "shape": [
1550
+ 11008,
1551
+ 2048
1552
+ ],
1553
+ "dtype": "float16",
1554
+ "format": "f32-to-bf16",
1555
+ "nbytes": 45088768,
1556
+ "byteOffset": 0
1557
+ }
1558
+ ],
1559
+ "md5sum": "ccc462a320c802f427b378ec82814f22"
1560
+ },
1561
+ {
1562
+ "dataPath": "params_shard_54.bin",
1563
+ "format": "raw-shard",
1564
+ "nbytes": 25165824,
1565
+ "records": [
1566
+ {
1567
+ "name": "model.layers.18.self_attn.qkv_proj.weight",
1568
+ "shape": [
1569
+ 6144,
1570
+ 2048
1571
+ ],
1572
+ "dtype": "float16",
1573
+ "format": "f32-to-bf16",
1574
+ "nbytes": 25165824,
1575
+ "byteOffset": 0
1576
+ }
1577
+ ],
1578
+ "md5sum": "d462e5fa68344d246660bc20a60313df"
1579
+ },
1580
+ {
1581
+ "dataPath": "params_shard_55.bin",
1582
+ "format": "raw-shard",
1583
+ "nbytes": 30941184,
1584
+ "records": [
1585
+ {
1586
+ "name": "model.layers.17.self_attn.o_proj.weight",
1587
+ "shape": [
1588
+ 2048,
1589
+ 2048
1590
+ ],
1591
+ "dtype": "float16",
1592
+ "format": "f32-to-bf16",
1593
+ "nbytes": 8388608,
1594
+ "byteOffset": 0
1595
+ },
1596
+ {
1597
+ "name": "model.layers.17.mlp.down_proj.weight",
1598
+ "shape": [
1599
+ 2048,
1600
+ 5504
1601
+ ],
1602
+ "dtype": "float16",
1603
+ "format": "f32-to-bf16",
1604
+ "nbytes": 22544384,
1605
+ "byteOffset": 8388608
1606
+ },
1607
+ {
1608
+ "name": "model.layers.17.input_layernorm.weight",
1609
+ "shape": [
1610
+ 2048
1611
+ ],
1612
+ "dtype": "float16",
1613
+ "format": "f32-to-bf16",
1614
+ "nbytes": 4096,
1615
+ "byteOffset": 30932992
1616
+ },
1617
+ {
1618
+ "name": "model.layers.17.post_attention_layernorm.weight",
1619
+ "shape": [
1620
+ 2048
1621
+ ],
1622
+ "dtype": "float16",
1623
+ "format": "f32-to-bf16",
1624
+ "nbytes": 4096,
1625
+ "byteOffset": 30937088
1626
+ }
1627
+ ],
1628
+ "md5sum": "4c50c77ae659de0aa23f697b7b5d2460"
1629
+ },
1630
+ {
1631
+ "dataPath": "params_shard_56.bin",
1632
+ "format": "raw-shard",
1633
+ "nbytes": 45088768,
1634
+ "records": [
1635
+ {
1636
+ "name": "model.layers.18.mlp.gate_up_proj.weight",
1637
+ "shape": [
1638
+ 11008,
1639
+ 2048
1640
+ ],
1641
+ "dtype": "float16",
1642
+ "format": "f32-to-bf16",
1643
+ "nbytes": 45088768,
1644
+ "byteOffset": 0
1645
+ }
1646
+ ],
1647
+ "md5sum": "fabe7dd8ded46df7fde7d6fd93cc9ce6"
1648
+ },
1649
+ {
1650
+ "dataPath": "params_shard_57.bin",
1651
+ "format": "raw-shard",
1652
+ "nbytes": 25165824,
1653
+ "records": [
1654
+ {
1655
+ "name": "model.layers.19.self_attn.qkv_proj.weight",
1656
+ "shape": [
1657
+ 6144,
1658
+ 2048
1659
+ ],
1660
+ "dtype": "float16",
1661
+ "format": "f32-to-bf16",
1662
+ "nbytes": 25165824,
1663
+ "byteOffset": 0
1664
+ }
1665
+ ],
1666
+ "md5sum": "a719eaa0e0a9ecf9d094cc6aeebea333"
1667
+ },
1668
+ {
1669
+ "dataPath": "params_shard_58.bin",
1670
+ "format": "raw-shard",
1671
+ "nbytes": 30941184,
1672
+ "records": [
1673
+ {
1674
+ "name": "model.layers.18.self_attn.o_proj.weight",
1675
+ "shape": [
1676
+ 2048,
1677
+ 2048
1678
+ ],
1679
+ "dtype": "float16",
1680
+ "format": "f32-to-bf16",
1681
+ "nbytes": 8388608,
1682
+ "byteOffset": 0
1683
+ },
1684
+ {
1685
+ "name": "model.layers.18.mlp.down_proj.weight",
1686
+ "shape": [
1687
+ 2048,
1688
+ 5504
1689
+ ],
1690
+ "dtype": "float16",
1691
+ "format": "f32-to-bf16",
1692
+ "nbytes": 22544384,
1693
+ "byteOffset": 8388608
1694
+ },
1695
+ {
1696
+ "name": "model.layers.18.input_layernorm.weight",
1697
+ "shape": [
1698
+ 2048
1699
+ ],
1700
+ "dtype": "float16",
1701
+ "format": "f32-to-bf16",
1702
+ "nbytes": 4096,
1703
+ "byteOffset": 30932992
1704
+ },
1705
+ {
1706
+ "name": "model.layers.18.post_attention_layernorm.weight",
1707
+ "shape": [
1708
+ 2048
1709
+ ],
1710
+ "dtype": "float16",
1711
+ "format": "f32-to-bf16",
1712
+ "nbytes": 4096,
1713
+ "byteOffset": 30937088
1714
+ }
1715
+ ],
1716
+ "md5sum": "95825020ecb32bf61ef3e1029db7f353"
1717
+ },
1718
+ {
1719
+ "dataPath": "params_shard_59.bin",
1720
+ "format": "raw-shard",
1721
+ "nbytes": 45088768,
1722
+ "records": [
1723
+ {
1724
+ "name": "model.layers.19.mlp.gate_up_proj.weight",
1725
+ "shape": [
1726
+ 11008,
1727
+ 2048
1728
+ ],
1729
+ "dtype": "float16",
1730
+ "format": "f32-to-bf16",
1731
+ "nbytes": 45088768,
1732
+ "byteOffset": 0
1733
+ }
1734
+ ],
1735
+ "md5sum": "621f309124cedaa397b831e5f0f1cf5e"
1736
+ },
1737
+ {
1738
+ "dataPath": "params_shard_60.bin",
1739
+ "format": "raw-shard",
1740
+ "nbytes": 25165824,
1741
+ "records": [
1742
+ {
1743
+ "name": "model.layers.20.self_attn.qkv_proj.weight",
1744
+ "shape": [
1745
+ 6144,
1746
+ 2048
1747
+ ],
1748
+ "dtype": "float16",
1749
+ "format": "f32-to-bf16",
1750
+ "nbytes": 25165824,
1751
+ "byteOffset": 0
1752
+ }
1753
+ ],
1754
+ "md5sum": "3aae87d5b421d59211214a94b4000103"
1755
+ },
1756
+ {
1757
+ "dataPath": "params_shard_61.bin",
1758
+ "format": "raw-shard",
1759
+ "nbytes": 30941184,
1760
+ "records": [
1761
+ {
1762
+ "name": "model.layers.19.self_attn.o_proj.weight",
1763
+ "shape": [
1764
+ 2048,
1765
+ 2048
1766
+ ],
1767
+ "dtype": "float16",
1768
+ "format": "f32-to-bf16",
1769
+ "nbytes": 8388608,
1770
+ "byteOffset": 0
1771
+ },
1772
+ {
1773
+ "name": "model.layers.19.mlp.down_proj.weight",
1774
+ "shape": [
1775
+ 2048,
1776
+ 5504
1777
+ ],
1778
+ "dtype": "float16",
1779
+ "format": "f32-to-bf16",
1780
+ "nbytes": 22544384,
1781
+ "byteOffset": 8388608
1782
+ },
1783
+ {
1784
+ "name": "model.layers.19.input_layernorm.weight",
1785
+ "shape": [
1786
+ 2048
1787
+ ],
1788
+ "dtype": "float16",
1789
+ "format": "f32-to-bf16",
1790
+ "nbytes": 4096,
1791
+ "byteOffset": 30932992
1792
+ },
1793
+ {
1794
+ "name": "model.layers.19.post_attention_layernorm.weight",
1795
+ "shape": [
1796
+ 2048
1797
+ ],
1798
+ "dtype": "float16",
1799
+ "format": "f32-to-bf16",
1800
+ "nbytes": 4096,
1801
+ "byteOffset": 30937088
1802
+ }
1803
+ ],
1804
+ "md5sum": "f5b77c5932d669f4f4e6cf0faafd3c89"
1805
+ },
1806
+ {
1807
+ "dataPath": "params_shard_62.bin",
1808
+ "format": "raw-shard",
1809
+ "nbytes": 45088768,
1810
+ "records": [
1811
+ {
1812
+ "name": "model.layers.20.mlp.gate_up_proj.weight",
1813
+ "shape": [
1814
+ 11008,
1815
+ 2048
1816
+ ],
1817
+ "dtype": "float16",
1818
+ "format": "f32-to-bf16",
1819
+ "nbytes": 45088768,
1820
+ "byteOffset": 0
1821
+ }
1822
+ ],
1823
+ "md5sum": "b17e8a72ffe031972f0bbc41c71ae713"
1824
+ },
1825
+ {
1826
+ "dataPath": "params_shard_63.bin",
1827
+ "format": "raw-shard",
1828
+ "nbytes": 25165824,
1829
+ "records": [
1830
+ {
1831
+ "name": "model.layers.21.self_attn.qkv_proj.weight",
1832
+ "shape": [
1833
+ 6144,
1834
+ 2048
1835
+ ],
1836
+ "dtype": "float16",
1837
+ "format": "f32-to-bf16",
1838
+ "nbytes": 25165824,
1839
+ "byteOffset": 0
1840
+ }
1841
+ ],
1842
+ "md5sum": "404d21fc88e458be85b543de6ac7fc76"
1843
+ },
1844
+ {
1845
+ "dataPath": "params_shard_64.bin",
1846
+ "format": "raw-shard",
1847
+ "nbytes": 30941184,
1848
+ "records": [
1849
+ {
1850
+ "name": "model.layers.20.self_attn.o_proj.weight",
1851
+ "shape": [
1852
+ 2048,
1853
+ 2048
1854
+ ],
1855
+ "dtype": "float16",
1856
+ "format": "f32-to-bf16",
1857
+ "nbytes": 8388608,
1858
+ "byteOffset": 0
1859
+ },
1860
+ {
1861
+ "name": "model.layers.20.mlp.down_proj.weight",
1862
+ "shape": [
1863
+ 2048,
1864
+ 5504
1865
+ ],
1866
+ "dtype": "float16",
1867
+ "format": "f32-to-bf16",
1868
+ "nbytes": 22544384,
1869
+ "byteOffset": 8388608
1870
+ },
1871
+ {
1872
+ "name": "model.layers.20.input_layernorm.weight",
1873
+ "shape": [
1874
+ 2048
1875
+ ],
1876
+ "dtype": "float16",
1877
+ "format": "f32-to-bf16",
1878
+ "nbytes": 4096,
1879
+ "byteOffset": 30932992
1880
+ },
1881
+ {
1882
+ "name": "model.layers.20.post_attention_layernorm.weight",
1883
+ "shape": [
1884
+ 2048
1885
+ ],
1886
+ "dtype": "float16",
1887
+ "format": "f32-to-bf16",
1888
+ "nbytes": 4096,
1889
+ "byteOffset": 30937088
1890
+ }
1891
+ ],
1892
+ "md5sum": "d713ab10d732a2165e1cbad5180ed82b"
1893
+ },
1894
+ {
1895
+ "dataPath": "params_shard_65.bin",
1896
+ "format": "raw-shard",
1897
+ "nbytes": 45088768,
1898
+ "records": [
1899
+ {
1900
+ "name": "model.layers.21.mlp.gate_up_proj.weight",
1901
+ "shape": [
1902
+ 11008,
1903
+ 2048
1904
+ ],
1905
+ "dtype": "float16",
1906
+ "format": "f32-to-bf16",
1907
+ "nbytes": 45088768,
1908
+ "byteOffset": 0
1909
+ }
1910
+ ],
1911
+ "md5sum": "ddc577228115c8d700e146100af96530"
1912
+ },
1913
+ {
1914
+ "dataPath": "params_shard_66.bin",
1915
+ "format": "raw-shard",
1916
+ "nbytes": 25165824,
1917
+ "records": [
1918
+ {
1919
+ "name": "model.layers.22.self_attn.qkv_proj.weight",
1920
+ "shape": [
1921
+ 6144,
1922
+ 2048
1923
+ ],
1924
+ "dtype": "float16",
1925
+ "format": "f32-to-bf16",
1926
+ "nbytes": 25165824,
1927
+ "byteOffset": 0
1928
+ }
1929
+ ],
1930
+ "md5sum": "9afea93aa3ac17edc305aa2cd8f16fe5"
1931
+ },
1932
+ {
1933
+ "dataPath": "params_shard_67.bin",
1934
+ "format": "raw-shard",
1935
+ "nbytes": 30941184,
1936
+ "records": [
1937
+ {
1938
+ "name": "model.layers.21.self_attn.o_proj.weight",
1939
+ "shape": [
1940
+ 2048,
1941
+ 2048
1942
+ ],
1943
+ "dtype": "float16",
1944
+ "format": "f32-to-bf16",
1945
+ "nbytes": 8388608,
1946
+ "byteOffset": 0
1947
+ },
1948
+ {
1949
+ "name": "model.layers.21.mlp.down_proj.weight",
1950
+ "shape": [
1951
+ 2048,
1952
+ 5504
1953
+ ],
1954
+ "dtype": "float16",
1955
+ "format": "f32-to-bf16",
1956
+ "nbytes": 22544384,
1957
+ "byteOffset": 8388608
1958
+ },
1959
+ {
1960
+ "name": "model.layers.21.input_layernorm.weight",
1961
+ "shape": [
1962
+ 2048
1963
+ ],
1964
+ "dtype": "float16",
1965
+ "format": "f32-to-bf16",
1966
+ "nbytes": 4096,
1967
+ "byteOffset": 30932992
1968
+ },
1969
+ {
1970
+ "name": "model.layers.21.post_attention_layernorm.weight",
1971
+ "shape": [
1972
+ 2048
1973
+ ],
1974
+ "dtype": "float16",
1975
+ "format": "f32-to-bf16",
1976
+ "nbytes": 4096,
1977
+ "byteOffset": 30937088
1978
+ }
1979
+ ],
1980
+ "md5sum": "8b1a597873d1fc6155871575d6419e6b"
1981
+ },
1982
+ {
1983
+ "dataPath": "params_shard_68.bin",
1984
+ "format": "raw-shard",
1985
+ "nbytes": 45088768,
1986
+ "records": [
1987
+ {
1988
+ "name": "model.layers.22.mlp.gate_up_proj.weight",
1989
+ "shape": [
1990
+ 11008,
1991
+ 2048
1992
+ ],
1993
+ "dtype": "float16",
1994
+ "format": "f32-to-bf16",
1995
+ "nbytes": 45088768,
1996
+ "byteOffset": 0
1997
+ }
1998
+ ],
1999
+ "md5sum": "34e19f6dbfe82b2e56b4f9deb05d5da7"
2000
+ },
2001
+ {
2002
+ "dataPath": "params_shard_69.bin",
2003
+ "format": "raw-shard",
2004
+ "nbytes": 25165824,
2005
+ "records": [
2006
+ {
2007
+ "name": "model.layers.23.self_attn.qkv_proj.weight",
2008
+ "shape": [
2009
+ 6144,
2010
+ 2048
2011
+ ],
2012
+ "dtype": "float16",
2013
+ "format": "f32-to-bf16",
2014
+ "nbytes": 25165824,
2015
+ "byteOffset": 0
2016
+ }
2017
+ ],
2018
+ "md5sum": "53cabf917ecd982a1068cf13390d121a"
2019
+ },
2020
+ {
2021
+ "dataPath": "params_shard_70.bin",
2022
+ "format": "raw-shard",
2023
+ "nbytes": 30941184,
2024
+ "records": [
2025
+ {
2026
+ "name": "model.layers.22.self_attn.o_proj.weight",
2027
+ "shape": [
2028
+ 2048,
2029
+ 2048
2030
+ ],
2031
+ "dtype": "float16",
2032
+ "format": "f32-to-bf16",
2033
+ "nbytes": 8388608,
2034
+ "byteOffset": 0
2035
+ },
2036
+ {
2037
+ "name": "model.layers.22.mlp.down_proj.weight",
2038
+ "shape": [
2039
+ 2048,
2040
+ 5504
2041
+ ],
2042
+ "dtype": "float16",
2043
+ "format": "f32-to-bf16",
2044
+ "nbytes": 22544384,
2045
+ "byteOffset": 8388608
2046
+ },
2047
+ {
2048
+ "name": "model.layers.22.input_layernorm.weight",
2049
+ "shape": [
2050
+ 2048
2051
+ ],
2052
+ "dtype": "float16",
2053
+ "format": "f32-to-bf16",
2054
+ "nbytes": 4096,
2055
+ "byteOffset": 30932992
2056
+ },
2057
+ {
2058
+ "name": "model.layers.22.post_attention_layernorm.weight",
2059
+ "shape": [
2060
+ 2048
2061
+ ],
2062
+ "dtype": "float16",
2063
+ "format": "f32-to-bf16",
2064
+ "nbytes": 4096,
2065
+ "byteOffset": 30937088
2066
+ }
2067
+ ],
2068
+ "md5sum": "d862984cd4e8ea226193160b190f8827"
2069
+ },
2070
+ {
2071
+ "dataPath": "params_shard_71.bin",
2072
+ "format": "raw-shard",
2073
+ "nbytes": 45088768,
2074
+ "records": [
2075
+ {
2076
+ "name": "model.layers.23.mlp.gate_up_proj.weight",
2077
+ "shape": [
2078
+ 11008,
2079
+ 2048
2080
+ ],
2081
+ "dtype": "float16",
2082
+ "format": "f32-to-bf16",
2083
+ "nbytes": 45088768,
2084
+ "byteOffset": 0
2085
+ }
2086
+ ],
2087
+ "md5sum": "95d1228e5ceb1f8a5f3618cf80882b89"
2088
+ },
2089
+ {
2090
+ "dataPath": "params_shard_72.bin",
2091
+ "format": "raw-shard",
2092
+ "nbytes": 131080192,
2093
+ "records": [
2094
+ {
2095
+ "name": "lm_head.weight",
2096
+ "shape": [
2097
+ 32002,
2098
+ 2048
2099
+ ],
2100
+ "dtype": "float16",
2101
+ "format": "f32-to-bf16",
2102
+ "nbytes": 131080192,
2103
+ "byteOffset": 0
2104
+ }
2105
+ ],
2106
+ "md5sum": "35f2374b89fa60891773b4fedacb8537"
2107
+ },
2108
+ {
2109
+ "dataPath": "params_shard_73.bin",
2110
+ "format": "raw-shard",
2111
+ "nbytes": 30945280,
2112
+ "records": [
2113
+ {
2114
+ "name": "model.layers.23.self_attn.o_proj.weight",
2115
+ "shape": [
2116
+ 2048,
2117
+ 2048
2118
+ ],
2119
+ "dtype": "float16",
2120
+ "format": "f32-to-bf16",
2121
+ "nbytes": 8388608,
2122
+ "byteOffset": 0
2123
+ },
2124
+ {
2125
+ "name": "model.layers.23.mlp.down_proj.weight",
2126
+ "shape": [
2127
+ 2048,
2128
+ 5504
2129
+ ],
2130
+ "dtype": "float16",
2131
+ "format": "f32-to-bf16",
2132
+ "nbytes": 22544384,
2133
+ "byteOffset": 8388608
2134
+ },
2135
+ {
2136
+ "name": "model.layers.23.input_layernorm.weight",
2137
+ "shape": [
2138
+ 2048
2139
+ ],
2140
+ "dtype": "float16",
2141
+ "format": "f32-to-bf16",
2142
+ "nbytes": 4096,
2143
+ "byteOffset": 30932992
2144
+ },
2145
+ {
2146
+ "name": "model.layers.23.post_attention_layernorm.weight",
2147
+ "shape": [
2148
+ 2048
2149
+ ],
2150
+ "dtype": "float16",
2151
+ "format": "f32-to-bf16",
2152
+ "nbytes": 4096,
2153
+ "byteOffset": 30937088
2154
+ },
2155
+ {
2156
+ "name": "model.norm.weight",
2157
+ "shape": [
2158
+ 2048
2159
+ ],
2160
+ "dtype": "float16",
2161
+ "format": "f32-to-bf16",
2162
+ "nbytes": 4096,
2163
+ "byteOffset": 30941184
2164
+ }
2165
+ ],
2166
+ "md5sum": "ddff1a7887347b8a3c9b8e4ca23e3e65"
2167
+ }
2168
+ ]
2169
+ }
params_shard_0.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e5000df03d5accdb70f466834f3281c87b8845d2ce8c8aeb99e9cfa0c008ff18
3
+ size 131080192
params_shard_1.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1ae19c1cb2c970e6b83fb4c5bada6648f4914775bcd9e3fd021622db16917992
3
+ size 25165824
params_shard_10.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:09e517f0d4355626a86f43a974cce3bed0aeb2d7e2e7142ddf89ea4a86fa5f69
3
+ size 30941184
params_shard_11.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:296f824cb609b55181d90f86ef74834e3f20dfa8c756d48b6c528a38940bcb65
3
+ size 45088768
params_shard_12.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:229338e50e1bc5b4395d191a885b98a1f928dc6ffdc4cb5ba642ede21cfe5816
3
+ size 25165824
params_shard_13.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:722f82c2e0dc3bd206c857f18e41a2796573f37b989c9aea808d7a0fe7cd7743
3
+ size 30941184
params_shard_14.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:493f32a7d2b57a81f21d0e00b445568b840acba2ca1a2cb1f11d9400b622ac86
3
+ size 45088768
params_shard_15.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e5eda8157f35e0397597067110b022a95c92cc2bf8791a97b9b4f60db395e691
3
+ size 25165824
params_shard_16.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3567ef8910a44849c2b90cfd3f418f5f801c93af57661275ceb426a8c7b1c3e6
3
+ size 30941184
params_shard_17.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4664463d8c8b73762855e922e62241e9e88c4bfb2fbbcce17d4b4a624c101dfc
3
+ size 45088768
params_shard_18.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bf14a83816c05b05fd9b33b98e1eea50091d061a0990514072ccdb7b0d8706bd
3
+ size 25165824
params_shard_19.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a44c1dffc7f8212381f13ade4c923bdc9a38f4afc828dab111aea59db5065313
3
+ size 30941184
params_shard_2.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ea843add38fa56f44eaec07afada02e6e91978573a564544c7a25af946108aa7
3
+ size 45088768
params_shard_20.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d9743ae0ba24941220ad4a1cb665904071144c132bae4aaa4d728022b9b91d26
3
+ size 45088768
params_shard_21.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ffcf74178815bd13ffb22f3ee99636e197e078a36efe7da29b7e02e8b0235c6a
3
+ size 25165824
params_shard_22.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:31dbc0cc24631ca9740ed818c9627e67d1d3eaba25cddfff4746da0db26d5191
3
+ size 30941184
params_shard_23.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c04cbec7d4e495f62a922178cc166c8c5ba00dd727bd57a6c96b2c618a56ce37
3
+ size 45088768
params_shard_24.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c26d5962120d84522021b645d5d08a20ebea73d4f2bf4e208761e0ebac8278e8
3
+ size 25165824
params_shard_25.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fc3447b80ae180212ca0c7db5b5a1b303452ffe6877a484e1820cd7d639955db
3
+ size 30941184
params_shard_26.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e43c27e46cbda08b2692d5ba0b0683b2374e2774193a6993f96038d7f1650ff9
3
+ size 45088768
params_shard_27.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7a4f64e75c5cd3fc34abbdddd93732757d6d26ad266d35ed1c716ef3869ea86b
3
+ size 25165824
params_shard_28.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a45f4d4714b52d3c56a7083b2e27e33c3e5fa0d270f7f96603362fc6c04260b1
3
+ size 30941184
params_shard_29.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ae6da4c1b7d483aaa97b1909c8834378a8d2a4b3ec5435258dd326ffe960f6ed
3
+ size 45088768
params_shard_3.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2332584be23d0741e8dd35a3b61b61517a6d504a0c869afbdff24e5aca3ff4fa
3
+ size 25165824
params_shard_30.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e76d64ec540d0f18c918039d34925879ac795da1cc5e733e7141a4ca4e93845f
3
+ size 25165824
params_shard_31.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:12de15a4ee0ed49178f65ae5f0fd78db08af5113b470df4f57c13626c231300e
3
+ size 30941184
params_shard_32.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:99944cc4f7dea41efe93706e2e107b08d4bbc7e6d515fecc5544c0a849bd06b6
3
+ size 45088768
params_shard_33.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:73f43257123ed0cc182e297bbc39e2d1f98311a67b79f2f6560fbb32724d943c
3
+ size 25165824
params_shard_34.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8e25cc8526939dffb7318526cf6cbbeaaf16177fba3471003a4d9ab8ad7f4a6c
3
+ size 30941184
params_shard_35.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b90d1388aeeed06110d4874883398ca3a0ddf35ad98088788fca882c35f4254e
3
+ size 45088768
params_shard_36.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5fa5865468be924d5b94934b65b1e36d11a5206d460f680bfa0912b265c79c17
3
+ size 25165824
params_shard_37.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:07c9aeb7eb6c57dc84c10ad711f6f0bd8aa39356431727ec819c5feed0bc4ef1
3
+ size 30941184
params_shard_38.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3a644201ea84a48edd1baa309f5faa0c10781fc7cec8ed3fc8d3ffe38e0d8dce
3
+ size 45088768
params_shard_39.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e9dc267055504ea759a6149677b57babb523df9755c19ad8aa075992acdd82e2
3
+ size 25165824
params_shard_4.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1540fb2a9b7d7c42acc842e4e78b04dc9917d347eb00e6287f31a15ff27cbd6f
3
+ size 30941184
params_shard_40.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:208acc64cc375989ddda21c852b733e0225905f062d13f7ea9baa33c562035c1
3
+ size 30941184
params_shard_41.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:541c23fa55ac8d4fc9c8285c36c5aebe15341dd5f38ad2bf624fa4172e7c7380
3
+ size 45088768
params_shard_42.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5cc759d6a799e36dd140227aabe45fe258323082407adddcf1440a8b6927baa4
3
+ size 25165824
params_shard_43.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c735f553a9de4aa8af237541844348ff257ced50ec056a87cde9a76faead6aa0
3
+ size 30941184
params_shard_44.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ec253a4352595e49830012f924566c2ce87155af56c0c51b9ed73f2aade4eba9
3
+ size 45088768
params_shard_45.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:35f22b1324ac480ac976a4321c59d3958b3a77d7b69ca95686b4b9babb5e29dd
3
+ size 25165824
params_shard_46.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0933854b56d8a99e09856a8eeb5f18e8b5a01a30f8426939f74299bb189dc425
3
+ size 30941184
params_shard_47.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:020354b9b8ce8c7be88c5b8a9c6bc3f7ba10a661b360457430783c080b9ffecb
3
+ size 45088768
params_shard_48.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cf166cf733d7349b6a2d2eaaa6155da1ff46cae0514fa1633a946f4077ac4264
3
+ size 25165824
params_shard_49.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a95361e809973245681c5b9b3afda5fd989487e2ab6b6b94fdb66e6d60a5f494
3
+ size 30941184
params_shard_5.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b087d7d70f6562539ce557d0e2a734392a74c96d24fa7a75fe7b6d82e2b9fb5a
3
+ size 45088768
params_shard_50.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b6dd2c7129cdeafc80ecea8bdc582f95abd855c182a60be189bdc4034b787b6f
3
+ size 45088768
params_shard_51.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2c45d94052939cfa1f5eb1b179d11f3a8922b3c68a3377015312b1a678b2dd8b
3
+ size 25165824