riczhou commited on
Commit
c9b85b7
1 Parent(s): ba7d461

Upload folder using huggingface_hub

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
added_tokens.json ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ {
2
+ "</s>": 2,
3
+ "<s>": 1,
4
+ "<unk>": 0
5
+ }
mlc-chat-config.json ADDED
@@ -0,0 +1,81 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "version": "0.1.0",
3
+ "model_type": "internlm2",
4
+ "quantization": "q0f16",
5
+ "model_config": {
6
+ "vocab_size": 92544,
7
+ "hidden_size": 2048,
8
+ "num_hidden_layers": 24,
9
+ "num_attention_heads": 16,
10
+ "num_key_value_heads": 8,
11
+ "rms_norm_eps": 1e-05,
12
+ "intermediate_size": 8192,
13
+ "bias": false,
14
+ "use_cache": true,
15
+ "rope_theta": 1000000,
16
+ "pad_token_id": 2,
17
+ "bos_token_id": 1,
18
+ "eos_token_id": 2,
19
+ "context_window_size": 32768,
20
+ "prefill_chunk_size": 2048,
21
+ "tensor_parallel_shards": 1,
22
+ "max_batch_size": 80,
23
+ "head_dim": 128
24
+ },
25
+ "vocab_size": 92544,
26
+ "context_window_size": 32768,
27
+ "sliding_window_size": -1,
28
+ "prefill_chunk_size": 2048,
29
+ "attention_sink_size": -1,
30
+ "tensor_parallel_shards": 1,
31
+ "pipeline_parallel_stages": 1,
32
+ "temperature": 1.0,
33
+ "presence_penalty": 0.0,
34
+ "frequency_penalty": 0.0,
35
+ "repetition_penalty": 1.0,
36
+ "top_p": 1.0,
37
+ "tokenizer_files": [
38
+ "tokenizer.model",
39
+ "tokenizer.json",
40
+ "added_tokens.json",
41
+ "tokenizer_config.json"
42
+ ],
43
+ "tokenizer_info": {
44
+ "token_postproc_method": "byte_fallback",
45
+ "prepend_space_in_encode": false,
46
+ "strip_space_in_decode": false
47
+ },
48
+ "conv_template": {
49
+ "name": "chatml",
50
+ "system_template": "<|im_start|>system\n{system_message}<|im_end|>\n",
51
+ "system_message": "A conversation between a user and an LLM-based AI assistant. The assistant gives helpful and honest answers.",
52
+ "system_prefix_token_ids": null,
53
+ "add_role_after_system_message": true,
54
+ "roles": {
55
+ "user": "<|im_start|>user",
56
+ "assistant": "<|im_start|>assistant"
57
+ },
58
+ "role_templates": {
59
+ "user": "{user_message}",
60
+ "assistant": "{assistant_message}",
61
+ "tool": "{tool_message}"
62
+ },
63
+ "messages": [],
64
+ "seps": [
65
+ "<|im_end|>\n"
66
+ ],
67
+ "role_content_sep": "\n",
68
+ "role_empty_sep": "\n",
69
+ "stop_str": [
70
+ "<|im_end|>"
71
+ ],
72
+ "stop_token_ids": [
73
+ 2
74
+ ],
75
+ "function_string": "",
76
+ "use_function_calling": false
77
+ },
78
+ "pad_token_id": 2,
79
+ "bos_token_id": 1,
80
+ "eos_token_id": 2
81
+ }
ndarray-cache.json ADDED
@@ -0,0 +1,2177 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "metadata": {
3
+ "ParamSize": 147,
4
+ "ParamBytes": 3778220032.0,
5
+ "BitsPerParam": 16.0
6
+ },
7
+ "records": [
8
+ {
9
+ "dataPath": "params_shard_0.bin",
10
+ "format": "raw-shard",
11
+ "nbytes": 379060224,
12
+ "records": [
13
+ {
14
+ "name": "model.tok_embeddings.weight",
15
+ "shape": [
16
+ 92544,
17
+ 2048
18
+ ],
19
+ "dtype": "float16",
20
+ "format": "f32-to-bf16",
21
+ "nbytes": 379060224,
22
+ "byteOffset": 0
23
+ }
24
+ ],
25
+ "md5sum": "c8f6d0d0c5f58f58b02af872f8519b94"
26
+ },
27
+ {
28
+ "dataPath": "params_shard_1.bin",
29
+ "format": "raw-shard",
30
+ "nbytes": 67108864,
31
+ "records": [
32
+ {
33
+ "name": "model.layers.0.feed_forward.gate_up_proj.weight",
34
+ "shape": [
35
+ 16384,
36
+ 2048
37
+ ],
38
+ "dtype": "float16",
39
+ "format": "f32-to-bf16",
40
+ "nbytes": 67108864,
41
+ "byteOffset": 0
42
+ }
43
+ ],
44
+ "md5sum": "e9e0ea543bb67a206b5d53f23b57a969"
45
+ },
46
+ {
47
+ "dataPath": "params_shard_2.bin",
48
+ "format": "raw-shard",
49
+ "nbytes": 33554432,
50
+ "records": [
51
+ {
52
+ "name": "model.layers.0.feed_forward.w2.weight",
53
+ "shape": [
54
+ 2048,
55
+ 8192
56
+ ],
57
+ "dtype": "float16",
58
+ "format": "f32-to-bf16",
59
+ "nbytes": 33554432,
60
+ "byteOffset": 0
61
+ }
62
+ ],
63
+ "md5sum": "70cbda20b2adbebe5c23af69cd4e31ed"
64
+ },
65
+ {
66
+ "dataPath": "params_shard_3.bin",
67
+ "format": "raw-shard",
68
+ "nbytes": 16777216,
69
+ "records": [
70
+ {
71
+ "name": "model.layers.1.attention.wqkv.weight",
72
+ "shape": [
73
+ 4096,
74
+ 2048
75
+ ],
76
+ "dtype": "float16",
77
+ "format": "f32-to-bf16",
78
+ "nbytes": 16777216,
79
+ "byteOffset": 0
80
+ }
81
+ ],
82
+ "md5sum": "fed4a1284c4f23d3d9abca686cbcbf7b"
83
+ },
84
+ {
85
+ "dataPath": "params_shard_4.bin",
86
+ "format": "raw-shard",
87
+ "nbytes": 25174016,
88
+ "records": [
89
+ {
90
+ "name": "model.layers.0.attention.wqkv.weight",
91
+ "shape": [
92
+ 4096,
93
+ 2048
94
+ ],
95
+ "dtype": "float16",
96
+ "format": "f32-to-bf16",
97
+ "nbytes": 16777216,
98
+ "byteOffset": 0
99
+ },
100
+ {
101
+ "name": "model.layers.0.attention.wo.weight",
102
+ "shape": [
103
+ 2048,
104
+ 2048
105
+ ],
106
+ "dtype": "float16",
107
+ "format": "f32-to-bf16",
108
+ "nbytes": 8388608,
109
+ "byteOffset": 16777216
110
+ },
111
+ {
112
+ "name": "model.layers.0.attention_norm.weight",
113
+ "shape": [
114
+ 2048
115
+ ],
116
+ "dtype": "float16",
117
+ "format": "f32-to-bf16",
118
+ "nbytes": 4096,
119
+ "byteOffset": 25165824
120
+ },
121
+ {
122
+ "name": "model.layers.0.ffn_norm.weight",
123
+ "shape": [
124
+ 2048
125
+ ],
126
+ "dtype": "float16",
127
+ "format": "f32-to-bf16",
128
+ "nbytes": 4096,
129
+ "byteOffset": 25169920
130
+ }
131
+ ],
132
+ "md5sum": "028257deb68100714c861c2561a92894"
133
+ },
134
+ {
135
+ "dataPath": "params_shard_5.bin",
136
+ "format": "raw-shard",
137
+ "nbytes": 67108864,
138
+ "records": [
139
+ {
140
+ "name": "model.layers.1.feed_forward.gate_up_proj.weight",
141
+ "shape": [
142
+ 16384,
143
+ 2048
144
+ ],
145
+ "dtype": "float16",
146
+ "format": "f32-to-bf16",
147
+ "nbytes": 67108864,
148
+ "byteOffset": 0
149
+ }
150
+ ],
151
+ "md5sum": "ed9044dffe359958caed045919b36188"
152
+ },
153
+ {
154
+ "dataPath": "params_shard_6.bin",
155
+ "format": "raw-shard",
156
+ "nbytes": 33554432,
157
+ "records": [
158
+ {
159
+ "name": "model.layers.1.feed_forward.w2.weight",
160
+ "shape": [
161
+ 2048,
162
+ 8192
163
+ ],
164
+ "dtype": "float16",
165
+ "format": "f32-to-bf16",
166
+ "nbytes": 33554432,
167
+ "byteOffset": 0
168
+ }
169
+ ],
170
+ "md5sum": "682f08a479bbeacd53b109382cbf4182"
171
+ },
172
+ {
173
+ "dataPath": "params_shard_7.bin",
174
+ "format": "raw-shard",
175
+ "nbytes": 25174016,
176
+ "records": [
177
+ {
178
+ "name": "model.layers.1.attention.wo.weight",
179
+ "shape": [
180
+ 2048,
181
+ 2048
182
+ ],
183
+ "dtype": "float16",
184
+ "format": "f32-to-bf16",
185
+ "nbytes": 8388608,
186
+ "byteOffset": 0
187
+ },
188
+ {
189
+ "name": "model.layers.1.attention_norm.weight",
190
+ "shape": [
191
+ 2048
192
+ ],
193
+ "dtype": "float16",
194
+ "format": "f32-to-bf16",
195
+ "nbytes": 4096,
196
+ "byteOffset": 8388608
197
+ },
198
+ {
199
+ "name": "model.layers.1.ffn_norm.weight",
200
+ "shape": [
201
+ 2048
202
+ ],
203
+ "dtype": "float16",
204
+ "format": "f32-to-bf16",
205
+ "nbytes": 4096,
206
+ "byteOffset": 8392704
207
+ },
208
+ {
209
+ "name": "model.layers.2.attention.wqkv.weight",
210
+ "shape": [
211
+ 4096,
212
+ 2048
213
+ ],
214
+ "dtype": "float16",
215
+ "format": "f32-to-bf16",
216
+ "nbytes": 16777216,
217
+ "byteOffset": 8396800
218
+ }
219
+ ],
220
+ "md5sum": "e8b9621a13bc64aad4377f5300cdd68a"
221
+ },
222
+ {
223
+ "dataPath": "params_shard_8.bin",
224
+ "format": "raw-shard",
225
+ "nbytes": 67108864,
226
+ "records": [
227
+ {
228
+ "name": "model.layers.2.feed_forward.gate_up_proj.weight",
229
+ "shape": [
230
+ 16384,
231
+ 2048
232
+ ],
233
+ "dtype": "float16",
234
+ "format": "f32-to-bf16",
235
+ "nbytes": 67108864,
236
+ "byteOffset": 0
237
+ }
238
+ ],
239
+ "md5sum": "23f4d64d37703ceeb250249e9cbf574a"
240
+ },
241
+ {
242
+ "dataPath": "params_shard_9.bin",
243
+ "format": "raw-shard",
244
+ "nbytes": 33554432,
245
+ "records": [
246
+ {
247
+ "name": "model.layers.2.feed_forward.w2.weight",
248
+ "shape": [
249
+ 2048,
250
+ 8192
251
+ ],
252
+ "dtype": "float16",
253
+ "format": "f32-to-bf16",
254
+ "nbytes": 33554432,
255
+ "byteOffset": 0
256
+ }
257
+ ],
258
+ "md5sum": "8b9163b046b254bcae6b6ce69e5367d5"
259
+ },
260
+ {
261
+ "dataPath": "params_shard_10.bin",
262
+ "format": "raw-shard",
263
+ "nbytes": 25174016,
264
+ "records": [
265
+ {
266
+ "name": "model.layers.2.attention.wo.weight",
267
+ "shape": [
268
+ 2048,
269
+ 2048
270
+ ],
271
+ "dtype": "float16",
272
+ "format": "f32-to-bf16",
273
+ "nbytes": 8388608,
274
+ "byteOffset": 0
275
+ },
276
+ {
277
+ "name": "model.layers.2.attention_norm.weight",
278
+ "shape": [
279
+ 2048
280
+ ],
281
+ "dtype": "float16",
282
+ "format": "f32-to-bf16",
283
+ "nbytes": 4096,
284
+ "byteOffset": 8388608
285
+ },
286
+ {
287
+ "name": "model.layers.2.ffn_norm.weight",
288
+ "shape": [
289
+ 2048
290
+ ],
291
+ "dtype": "float16",
292
+ "format": "f32-to-bf16",
293
+ "nbytes": 4096,
294
+ "byteOffset": 8392704
295
+ },
296
+ {
297
+ "name": "model.layers.3.attention.wqkv.weight",
298
+ "shape": [
299
+ 4096,
300
+ 2048
301
+ ],
302
+ "dtype": "float16",
303
+ "format": "f32-to-bf16",
304
+ "nbytes": 16777216,
305
+ "byteOffset": 8396800
306
+ }
307
+ ],
308
+ "md5sum": "77d70dee602590a2472593b4b08ac350"
309
+ },
310
+ {
311
+ "dataPath": "params_shard_11.bin",
312
+ "format": "raw-shard",
313
+ "nbytes": 67108864,
314
+ "records": [
315
+ {
316
+ "name": "model.layers.3.feed_forward.gate_up_proj.weight",
317
+ "shape": [
318
+ 16384,
319
+ 2048
320
+ ],
321
+ "dtype": "float16",
322
+ "format": "f32-to-bf16",
323
+ "nbytes": 67108864,
324
+ "byteOffset": 0
325
+ }
326
+ ],
327
+ "md5sum": "d268c66d207209508f136c5f2dbad373"
328
+ },
329
+ {
330
+ "dataPath": "params_shard_12.bin",
331
+ "format": "raw-shard",
332
+ "nbytes": 33554432,
333
+ "records": [
334
+ {
335
+ "name": "model.layers.3.feed_forward.w2.weight",
336
+ "shape": [
337
+ 2048,
338
+ 8192
339
+ ],
340
+ "dtype": "float16",
341
+ "format": "f32-to-bf16",
342
+ "nbytes": 33554432,
343
+ "byteOffset": 0
344
+ }
345
+ ],
346
+ "md5sum": "86561d0d004aa36e77c3eaf5531b6b20"
347
+ },
348
+ {
349
+ "dataPath": "params_shard_13.bin",
350
+ "format": "raw-shard",
351
+ "nbytes": 25174016,
352
+ "records": [
353
+ {
354
+ "name": "model.layers.3.attention.wo.weight",
355
+ "shape": [
356
+ 2048,
357
+ 2048
358
+ ],
359
+ "dtype": "float16",
360
+ "format": "f32-to-bf16",
361
+ "nbytes": 8388608,
362
+ "byteOffset": 0
363
+ },
364
+ {
365
+ "name": "model.layers.3.attention_norm.weight",
366
+ "shape": [
367
+ 2048
368
+ ],
369
+ "dtype": "float16",
370
+ "format": "f32-to-bf16",
371
+ "nbytes": 4096,
372
+ "byteOffset": 8388608
373
+ },
374
+ {
375
+ "name": "model.layers.3.ffn_norm.weight",
376
+ "shape": [
377
+ 2048
378
+ ],
379
+ "dtype": "float16",
380
+ "format": "f32-to-bf16",
381
+ "nbytes": 4096,
382
+ "byteOffset": 8392704
383
+ },
384
+ {
385
+ "name": "model.layers.4.attention.wqkv.weight",
386
+ "shape": [
387
+ 4096,
388
+ 2048
389
+ ],
390
+ "dtype": "float16",
391
+ "format": "f32-to-bf16",
392
+ "nbytes": 16777216,
393
+ "byteOffset": 8396800
394
+ }
395
+ ],
396
+ "md5sum": "532707f8381833e717c516147071ae5a"
397
+ },
398
+ {
399
+ "dataPath": "params_shard_14.bin",
400
+ "format": "raw-shard",
401
+ "nbytes": 67108864,
402
+ "records": [
403
+ {
404
+ "name": "model.layers.4.feed_forward.gate_up_proj.weight",
405
+ "shape": [
406
+ 16384,
407
+ 2048
408
+ ],
409
+ "dtype": "float16",
410
+ "format": "f32-to-bf16",
411
+ "nbytes": 67108864,
412
+ "byteOffset": 0
413
+ }
414
+ ],
415
+ "md5sum": "f639c7f948770c385f8a947e89f83080"
416
+ },
417
+ {
418
+ "dataPath": "params_shard_15.bin",
419
+ "format": "raw-shard",
420
+ "nbytes": 33554432,
421
+ "records": [
422
+ {
423
+ "name": "model.layers.4.feed_forward.w2.weight",
424
+ "shape": [
425
+ 2048,
426
+ 8192
427
+ ],
428
+ "dtype": "float16",
429
+ "format": "f32-to-bf16",
430
+ "nbytes": 33554432,
431
+ "byteOffset": 0
432
+ }
433
+ ],
434
+ "md5sum": "2aded3a887e46dcf74e0e59556092811"
435
+ },
436
+ {
437
+ "dataPath": "params_shard_16.bin",
438
+ "format": "raw-shard",
439
+ "nbytes": 25174016,
440
+ "records": [
441
+ {
442
+ "name": "model.layers.4.attention.wo.weight",
443
+ "shape": [
444
+ 2048,
445
+ 2048
446
+ ],
447
+ "dtype": "float16",
448
+ "format": "f32-to-bf16",
449
+ "nbytes": 8388608,
450
+ "byteOffset": 0
451
+ },
452
+ {
453
+ "name": "model.layers.4.attention_norm.weight",
454
+ "shape": [
455
+ 2048
456
+ ],
457
+ "dtype": "float16",
458
+ "format": "f32-to-bf16",
459
+ "nbytes": 4096,
460
+ "byteOffset": 8388608
461
+ },
462
+ {
463
+ "name": "model.layers.4.ffn_norm.weight",
464
+ "shape": [
465
+ 2048
466
+ ],
467
+ "dtype": "float16",
468
+ "format": "f32-to-bf16",
469
+ "nbytes": 4096,
470
+ "byteOffset": 8392704
471
+ },
472
+ {
473
+ "name": "model.layers.5.attention.wqkv.weight",
474
+ "shape": [
475
+ 4096,
476
+ 2048
477
+ ],
478
+ "dtype": "float16",
479
+ "format": "f32-to-bf16",
480
+ "nbytes": 16777216,
481
+ "byteOffset": 8396800
482
+ }
483
+ ],
484
+ "md5sum": "4d0f3ccc94d472357c2551878619478a"
485
+ },
486
+ {
487
+ "dataPath": "params_shard_17.bin",
488
+ "format": "raw-shard",
489
+ "nbytes": 67108864,
490
+ "records": [
491
+ {
492
+ "name": "model.layers.5.feed_forward.gate_up_proj.weight",
493
+ "shape": [
494
+ 16384,
495
+ 2048
496
+ ],
497
+ "dtype": "float16",
498
+ "format": "f32-to-bf16",
499
+ "nbytes": 67108864,
500
+ "byteOffset": 0
501
+ }
502
+ ],
503
+ "md5sum": "fde2165f55fcce1cd07339890ad0953c"
504
+ },
505
+ {
506
+ "dataPath": "params_shard_18.bin",
507
+ "format": "raw-shard",
508
+ "nbytes": 33554432,
509
+ "records": [
510
+ {
511
+ "name": "model.layers.5.feed_forward.w2.weight",
512
+ "shape": [
513
+ 2048,
514
+ 8192
515
+ ],
516
+ "dtype": "float16",
517
+ "format": "f32-to-bf16",
518
+ "nbytes": 33554432,
519
+ "byteOffset": 0
520
+ }
521
+ ],
522
+ "md5sum": "e952acf155de30cb5bd4418a2605c38e"
523
+ },
524
+ {
525
+ "dataPath": "params_shard_19.bin",
526
+ "format": "raw-shard",
527
+ "nbytes": 25174016,
528
+ "records": [
529
+ {
530
+ "name": "model.layers.5.attention.wo.weight",
531
+ "shape": [
532
+ 2048,
533
+ 2048
534
+ ],
535
+ "dtype": "float16",
536
+ "format": "f32-to-bf16",
537
+ "nbytes": 8388608,
538
+ "byteOffset": 0
539
+ },
540
+ {
541
+ "name": "model.layers.5.attention_norm.weight",
542
+ "shape": [
543
+ 2048
544
+ ],
545
+ "dtype": "float16",
546
+ "format": "f32-to-bf16",
547
+ "nbytes": 4096,
548
+ "byteOffset": 8388608
549
+ },
550
+ {
551
+ "name": "model.layers.5.ffn_norm.weight",
552
+ "shape": [
553
+ 2048
554
+ ],
555
+ "dtype": "float16",
556
+ "format": "f32-to-bf16",
557
+ "nbytes": 4096,
558
+ "byteOffset": 8392704
559
+ },
560
+ {
561
+ "name": "model.layers.6.attention.wqkv.weight",
562
+ "shape": [
563
+ 4096,
564
+ 2048
565
+ ],
566
+ "dtype": "float16",
567
+ "format": "f32-to-bf16",
568
+ "nbytes": 16777216,
569
+ "byteOffset": 8396800
570
+ }
571
+ ],
572
+ "md5sum": "57285e9b285c039e68ba931ff2c38abf"
573
+ },
574
+ {
575
+ "dataPath": "params_shard_20.bin",
576
+ "format": "raw-shard",
577
+ "nbytes": 67108864,
578
+ "records": [
579
+ {
580
+ "name": "model.layers.6.feed_forward.gate_up_proj.weight",
581
+ "shape": [
582
+ 16384,
583
+ 2048
584
+ ],
585
+ "dtype": "float16",
586
+ "format": "f32-to-bf16",
587
+ "nbytes": 67108864,
588
+ "byteOffset": 0
589
+ }
590
+ ],
591
+ "md5sum": "1ec9ea556ac6318d477f27a9ffccc1b4"
592
+ },
593
+ {
594
+ "dataPath": "params_shard_21.bin",
595
+ "format": "raw-shard",
596
+ "nbytes": 33554432,
597
+ "records": [
598
+ {
599
+ "name": "model.layers.6.feed_forward.w2.weight",
600
+ "shape": [
601
+ 2048,
602
+ 8192
603
+ ],
604
+ "dtype": "float16",
605
+ "format": "f32-to-bf16",
606
+ "nbytes": 33554432,
607
+ "byteOffset": 0
608
+ }
609
+ ],
610
+ "md5sum": "e4961b543fc0540d53355fadb5202997"
611
+ },
612
+ {
613
+ "dataPath": "params_shard_22.bin",
614
+ "format": "raw-shard",
615
+ "nbytes": 25174016,
616
+ "records": [
617
+ {
618
+ "name": "model.layers.6.attention.wo.weight",
619
+ "shape": [
620
+ 2048,
621
+ 2048
622
+ ],
623
+ "dtype": "float16",
624
+ "format": "f32-to-bf16",
625
+ "nbytes": 8388608,
626
+ "byteOffset": 0
627
+ },
628
+ {
629
+ "name": "model.layers.6.attention_norm.weight",
630
+ "shape": [
631
+ 2048
632
+ ],
633
+ "dtype": "float16",
634
+ "format": "f32-to-bf16",
635
+ "nbytes": 4096,
636
+ "byteOffset": 8388608
637
+ },
638
+ {
639
+ "name": "model.layers.6.ffn_norm.weight",
640
+ "shape": [
641
+ 2048
642
+ ],
643
+ "dtype": "float16",
644
+ "format": "f32-to-bf16",
645
+ "nbytes": 4096,
646
+ "byteOffset": 8392704
647
+ },
648
+ {
649
+ "name": "model.layers.7.attention.wqkv.weight",
650
+ "shape": [
651
+ 4096,
652
+ 2048
653
+ ],
654
+ "dtype": "float16",
655
+ "format": "f32-to-bf16",
656
+ "nbytes": 16777216,
657
+ "byteOffset": 8396800
658
+ }
659
+ ],
660
+ "md5sum": "01d1a8f29dff55d3e5106ae2fd2bf92b"
661
+ },
662
+ {
663
+ "dataPath": "params_shard_23.bin",
664
+ "format": "raw-shard",
665
+ "nbytes": 67108864,
666
+ "records": [
667
+ {
668
+ "name": "model.layers.7.feed_forward.gate_up_proj.weight",
669
+ "shape": [
670
+ 16384,
671
+ 2048
672
+ ],
673
+ "dtype": "float16",
674
+ "format": "f32-to-bf16",
675
+ "nbytes": 67108864,
676
+ "byteOffset": 0
677
+ }
678
+ ],
679
+ "md5sum": "ce15bfcf9d4c007ff198bbac8311d275"
680
+ },
681
+ {
682
+ "dataPath": "params_shard_24.bin",
683
+ "format": "raw-shard",
684
+ "nbytes": 33554432,
685
+ "records": [
686
+ {
687
+ "name": "model.layers.7.feed_forward.w2.weight",
688
+ "shape": [
689
+ 2048,
690
+ 8192
691
+ ],
692
+ "dtype": "float16",
693
+ "format": "f32-to-bf16",
694
+ "nbytes": 33554432,
695
+ "byteOffset": 0
696
+ }
697
+ ],
698
+ "md5sum": "3b848263184d5f655c63131eaa9aa7c5"
699
+ },
700
+ {
701
+ "dataPath": "params_shard_25.bin",
702
+ "format": "raw-shard",
703
+ "nbytes": 25174016,
704
+ "records": [
705
+ {
706
+ "name": "model.layers.7.attention.wo.weight",
707
+ "shape": [
708
+ 2048,
709
+ 2048
710
+ ],
711
+ "dtype": "float16",
712
+ "format": "f32-to-bf16",
713
+ "nbytes": 8388608,
714
+ "byteOffset": 0
715
+ },
716
+ {
717
+ "name": "model.layers.7.attention_norm.weight",
718
+ "shape": [
719
+ 2048
720
+ ],
721
+ "dtype": "float16",
722
+ "format": "f32-to-bf16",
723
+ "nbytes": 4096,
724
+ "byteOffset": 8388608
725
+ },
726
+ {
727
+ "name": "model.layers.7.ffn_norm.weight",
728
+ "shape": [
729
+ 2048
730
+ ],
731
+ "dtype": "float16",
732
+ "format": "f32-to-bf16",
733
+ "nbytes": 4096,
734
+ "byteOffset": 8392704
735
+ },
736
+ {
737
+ "name": "model.layers.8.attention.wqkv.weight",
738
+ "shape": [
739
+ 4096,
740
+ 2048
741
+ ],
742
+ "dtype": "float16",
743
+ "format": "f32-to-bf16",
744
+ "nbytes": 16777216,
745
+ "byteOffset": 8396800
746
+ }
747
+ ],
748
+ "md5sum": "2e00fe5598e7974e3558325e16933b05"
749
+ },
750
+ {
751
+ "dataPath": "params_shard_26.bin",
752
+ "format": "raw-shard",
753
+ "nbytes": 67108864,
754
+ "records": [
755
+ {
756
+ "name": "model.layers.8.feed_forward.gate_up_proj.weight",
757
+ "shape": [
758
+ 16384,
759
+ 2048
760
+ ],
761
+ "dtype": "float16",
762
+ "format": "f32-to-bf16",
763
+ "nbytes": 67108864,
764
+ "byteOffset": 0
765
+ }
766
+ ],
767
+ "md5sum": "62d160d27fa806ee042a2d5d1a2d7f9c"
768
+ },
769
+ {
770
+ "dataPath": "params_shard_27.bin",
771
+ "format": "raw-shard",
772
+ "nbytes": 33554432,
773
+ "records": [
774
+ {
775
+ "name": "model.layers.8.feed_forward.w2.weight",
776
+ "shape": [
777
+ 2048,
778
+ 8192
779
+ ],
780
+ "dtype": "float16",
781
+ "format": "f32-to-bf16",
782
+ "nbytes": 33554432,
783
+ "byteOffset": 0
784
+ }
785
+ ],
786
+ "md5sum": "4291c51e797a54603f2918f85bba0f28"
787
+ },
788
+ {
789
+ "dataPath": "params_shard_28.bin",
790
+ "format": "raw-shard",
791
+ "nbytes": 25174016,
792
+ "records": [
793
+ {
794
+ "name": "model.layers.8.attention.wo.weight",
795
+ "shape": [
796
+ 2048,
797
+ 2048
798
+ ],
799
+ "dtype": "float16",
800
+ "format": "f32-to-bf16",
801
+ "nbytes": 8388608,
802
+ "byteOffset": 0
803
+ },
804
+ {
805
+ "name": "model.layers.8.attention_norm.weight",
806
+ "shape": [
807
+ 2048
808
+ ],
809
+ "dtype": "float16",
810
+ "format": "f32-to-bf16",
811
+ "nbytes": 4096,
812
+ "byteOffset": 8388608
813
+ },
814
+ {
815
+ "name": "model.layers.8.ffn_norm.weight",
816
+ "shape": [
817
+ 2048
818
+ ],
819
+ "dtype": "float16",
820
+ "format": "f32-to-bf16",
821
+ "nbytes": 4096,
822
+ "byteOffset": 8392704
823
+ },
824
+ {
825
+ "name": "model.layers.9.attention.wqkv.weight",
826
+ "shape": [
827
+ 4096,
828
+ 2048
829
+ ],
830
+ "dtype": "float16",
831
+ "format": "f32-to-bf16",
832
+ "nbytes": 16777216,
833
+ "byteOffset": 8396800
834
+ }
835
+ ],
836
+ "md5sum": "1456d6dce1538c10ba035f37f028b6f9"
837
+ },
838
+ {
839
+ "dataPath": "params_shard_29.bin",
840
+ "format": "raw-shard",
841
+ "nbytes": 67108864,
842
+ "records": [
843
+ {
844
+ "name": "model.layers.9.feed_forward.gate_up_proj.weight",
845
+ "shape": [
846
+ 16384,
847
+ 2048
848
+ ],
849
+ "dtype": "float16",
850
+ "format": "f32-to-bf16",
851
+ "nbytes": 67108864,
852
+ "byteOffset": 0
853
+ }
854
+ ],
855
+ "md5sum": "e8423d504e5a1f07794fa6a19c6516c1"
856
+ },
857
+ {
858
+ "dataPath": "params_shard_30.bin",
859
+ "format": "raw-shard",
860
+ "nbytes": 33554432,
861
+ "records": [
862
+ {
863
+ "name": "model.layers.9.feed_forward.w2.weight",
864
+ "shape": [
865
+ 2048,
866
+ 8192
867
+ ],
868
+ "dtype": "float16",
869
+ "format": "f32-to-bf16",
870
+ "nbytes": 33554432,
871
+ "byteOffset": 0
872
+ }
873
+ ],
874
+ "md5sum": "8622070d4f372b7748b17907528d575a"
875
+ },
876
+ {
877
+ "dataPath": "params_shard_31.bin",
878
+ "format": "raw-shard",
879
+ "nbytes": 25174016,
880
+ "records": [
881
+ {
882
+ "name": "model.layers.9.attention.wo.weight",
883
+ "shape": [
884
+ 2048,
885
+ 2048
886
+ ],
887
+ "dtype": "float16",
888
+ "format": "f32-to-bf16",
889
+ "nbytes": 8388608,
890
+ "byteOffset": 0
891
+ },
892
+ {
893
+ "name": "model.layers.9.attention_norm.weight",
894
+ "shape": [
895
+ 2048
896
+ ],
897
+ "dtype": "float16",
898
+ "format": "f32-to-bf16",
899
+ "nbytes": 4096,
900
+ "byteOffset": 8388608
901
+ },
902
+ {
903
+ "name": "model.layers.9.ffn_norm.weight",
904
+ "shape": [
905
+ 2048
906
+ ],
907
+ "dtype": "float16",
908
+ "format": "f32-to-bf16",
909
+ "nbytes": 4096,
910
+ "byteOffset": 8392704
911
+ },
912
+ {
913
+ "name": "model.layers.10.attention.wqkv.weight",
914
+ "shape": [
915
+ 4096,
916
+ 2048
917
+ ],
918
+ "dtype": "float16",
919
+ "format": "f32-to-bf16",
920
+ "nbytes": 16777216,
921
+ "byteOffset": 8396800
922
+ }
923
+ ],
924
+ "md5sum": "004f0aeb8769bb482560a71def97dc0b"
925
+ },
926
+ {
927
+ "dataPath": "params_shard_32.bin",
928
+ "format": "raw-shard",
929
+ "nbytes": 67108864,
930
+ "records": [
931
+ {
932
+ "name": "model.layers.10.feed_forward.gate_up_proj.weight",
933
+ "shape": [
934
+ 16384,
935
+ 2048
936
+ ],
937
+ "dtype": "float16",
938
+ "format": "f32-to-bf16",
939
+ "nbytes": 67108864,
940
+ "byteOffset": 0
941
+ }
942
+ ],
943
+ "md5sum": "d4f80971ed13439e41517b52d4a6740e"
944
+ },
945
+ {
946
+ "dataPath": "params_shard_33.bin",
947
+ "format": "raw-shard",
948
+ "nbytes": 33554432,
949
+ "records": [
950
+ {
951
+ "name": "model.layers.10.feed_forward.w2.weight",
952
+ "shape": [
953
+ 2048,
954
+ 8192
955
+ ],
956
+ "dtype": "float16",
957
+ "format": "f32-to-bf16",
958
+ "nbytes": 33554432,
959
+ "byteOffset": 0
960
+ }
961
+ ],
962
+ "md5sum": "0d855105d7a746718a1b391637b5c3fb"
963
+ },
964
+ {
965
+ "dataPath": "params_shard_34.bin",
966
+ "format": "raw-shard",
967
+ "nbytes": 25174016,
968
+ "records": [
969
+ {
970
+ "name": "model.layers.10.attention.wo.weight",
971
+ "shape": [
972
+ 2048,
973
+ 2048
974
+ ],
975
+ "dtype": "float16",
976
+ "format": "f32-to-bf16",
977
+ "nbytes": 8388608,
978
+ "byteOffset": 0
979
+ },
980
+ {
981
+ "name": "model.layers.10.attention_norm.weight",
982
+ "shape": [
983
+ 2048
984
+ ],
985
+ "dtype": "float16",
986
+ "format": "f32-to-bf16",
987
+ "nbytes": 4096,
988
+ "byteOffset": 8388608
989
+ },
990
+ {
991
+ "name": "model.layers.10.ffn_norm.weight",
992
+ "shape": [
993
+ 2048
994
+ ],
995
+ "dtype": "float16",
996
+ "format": "f32-to-bf16",
997
+ "nbytes": 4096,
998
+ "byteOffset": 8392704
999
+ },
1000
+ {
1001
+ "name": "model.layers.11.attention.wqkv.weight",
1002
+ "shape": [
1003
+ 4096,
1004
+ 2048
1005
+ ],
1006
+ "dtype": "float16",
1007
+ "format": "f32-to-bf16",
1008
+ "nbytes": 16777216,
1009
+ "byteOffset": 8396800
1010
+ }
1011
+ ],
1012
+ "md5sum": "003e068d64993094f976be6fca62343a"
1013
+ },
1014
+ {
1015
+ "dataPath": "params_shard_35.bin",
1016
+ "format": "raw-shard",
1017
+ "nbytes": 67108864,
1018
+ "records": [
1019
+ {
1020
+ "name": "model.layers.11.feed_forward.gate_up_proj.weight",
1021
+ "shape": [
1022
+ 16384,
1023
+ 2048
1024
+ ],
1025
+ "dtype": "float16",
1026
+ "format": "f32-to-bf16",
1027
+ "nbytes": 67108864,
1028
+ "byteOffset": 0
1029
+ }
1030
+ ],
1031
+ "md5sum": "928d5b31f74ed716f8a94bee96eac465"
1032
+ },
1033
+ {
1034
+ "dataPath": "params_shard_36.bin",
1035
+ "format": "raw-shard",
1036
+ "nbytes": 33554432,
1037
+ "records": [
1038
+ {
1039
+ "name": "model.layers.11.feed_forward.w2.weight",
1040
+ "shape": [
1041
+ 2048,
1042
+ 8192
1043
+ ],
1044
+ "dtype": "float16",
1045
+ "format": "f32-to-bf16",
1046
+ "nbytes": 33554432,
1047
+ "byteOffset": 0
1048
+ }
1049
+ ],
1050
+ "md5sum": "ce1376dfdf59e8b06aca43aafbd95d70"
1051
+ },
1052
+ {
1053
+ "dataPath": "params_shard_37.bin",
1054
+ "format": "raw-shard",
1055
+ "nbytes": 25174016,
1056
+ "records": [
1057
+ {
1058
+ "name": "model.layers.11.attention.wo.weight",
1059
+ "shape": [
1060
+ 2048,
1061
+ 2048
1062
+ ],
1063
+ "dtype": "float16",
1064
+ "format": "f32-to-bf16",
1065
+ "nbytes": 8388608,
1066
+ "byteOffset": 0
1067
+ },
1068
+ {
1069
+ "name": "model.layers.11.attention_norm.weight",
1070
+ "shape": [
1071
+ 2048
1072
+ ],
1073
+ "dtype": "float16",
1074
+ "format": "f32-to-bf16",
1075
+ "nbytes": 4096,
1076
+ "byteOffset": 8388608
1077
+ },
1078
+ {
1079
+ "name": "model.layers.11.ffn_norm.weight",
1080
+ "shape": [
1081
+ 2048
1082
+ ],
1083
+ "dtype": "float16",
1084
+ "format": "f32-to-bf16",
1085
+ "nbytes": 4096,
1086
+ "byteOffset": 8392704
1087
+ },
1088
+ {
1089
+ "name": "model.layers.12.attention.wqkv.weight",
1090
+ "shape": [
1091
+ 4096,
1092
+ 2048
1093
+ ],
1094
+ "dtype": "float16",
1095
+ "format": "f32-to-bf16",
1096
+ "nbytes": 16777216,
1097
+ "byteOffset": 8396800
1098
+ }
1099
+ ],
1100
+ "md5sum": "002539101d27a72e45c3ef2fe6d35bca"
1101
+ },
1102
+ {
1103
+ "dataPath": "params_shard_38.bin",
1104
+ "format": "raw-shard",
1105
+ "nbytes": 67108864,
1106
+ "records": [
1107
+ {
1108
+ "name": "model.layers.12.feed_forward.gate_up_proj.weight",
1109
+ "shape": [
1110
+ 16384,
1111
+ 2048
1112
+ ],
1113
+ "dtype": "float16",
1114
+ "format": "f32-to-bf16",
1115
+ "nbytes": 67108864,
1116
+ "byteOffset": 0
1117
+ }
1118
+ ],
1119
+ "md5sum": "3994a0500df53882d6e8e7f58c709192"
1120
+ },
1121
+ {
1122
+ "dataPath": "params_shard_39.bin",
1123
+ "format": "raw-shard",
1124
+ "nbytes": 33554432,
1125
+ "records": [
1126
+ {
1127
+ "name": "model.layers.12.feed_forward.w2.weight",
1128
+ "shape": [
1129
+ 2048,
1130
+ 8192
1131
+ ],
1132
+ "dtype": "float16",
1133
+ "format": "f32-to-bf16",
1134
+ "nbytes": 33554432,
1135
+ "byteOffset": 0
1136
+ }
1137
+ ],
1138
+ "md5sum": "d9b0377aee207f260351bb243c039f1c"
1139
+ },
1140
+ {
1141
+ "dataPath": "params_shard_40.bin",
1142
+ "format": "raw-shard",
1143
+ "nbytes": 25174016,
1144
+ "records": [
1145
+ {
1146
+ "name": "model.layers.12.attention.wo.weight",
1147
+ "shape": [
1148
+ 2048,
1149
+ 2048
1150
+ ],
1151
+ "dtype": "float16",
1152
+ "format": "f32-to-bf16",
1153
+ "nbytes": 8388608,
1154
+ "byteOffset": 0
1155
+ },
1156
+ {
1157
+ "name": "model.layers.12.attention_norm.weight",
1158
+ "shape": [
1159
+ 2048
1160
+ ],
1161
+ "dtype": "float16",
1162
+ "format": "f32-to-bf16",
1163
+ "nbytes": 4096,
1164
+ "byteOffset": 8388608
1165
+ },
1166
+ {
1167
+ "name": "model.layers.12.ffn_norm.weight",
1168
+ "shape": [
1169
+ 2048
1170
+ ],
1171
+ "dtype": "float16",
1172
+ "format": "f32-to-bf16",
1173
+ "nbytes": 4096,
1174
+ "byteOffset": 8392704
1175
+ },
1176
+ {
1177
+ "name": "model.layers.13.attention.wqkv.weight",
1178
+ "shape": [
1179
+ 4096,
1180
+ 2048
1181
+ ],
1182
+ "dtype": "float16",
1183
+ "format": "f32-to-bf16",
1184
+ "nbytes": 16777216,
1185
+ "byteOffset": 8396800
1186
+ }
1187
+ ],
1188
+ "md5sum": "cca43e273ed1b32466550de2f273c9f2"
1189
+ },
1190
+ {
1191
+ "dataPath": "params_shard_41.bin",
1192
+ "format": "raw-shard",
1193
+ "nbytes": 67108864,
1194
+ "records": [
1195
+ {
1196
+ "name": "model.layers.13.feed_forward.gate_up_proj.weight",
1197
+ "shape": [
1198
+ 16384,
1199
+ 2048
1200
+ ],
1201
+ "dtype": "float16",
1202
+ "format": "f32-to-bf16",
1203
+ "nbytes": 67108864,
1204
+ "byteOffset": 0
1205
+ }
1206
+ ],
1207
+ "md5sum": "b1ab553bb92ee1bb7cf15820f5d94143"
1208
+ },
1209
+ {
1210
+ "dataPath": "params_shard_42.bin",
1211
+ "format": "raw-shard",
1212
+ "nbytes": 33554432,
1213
+ "records": [
1214
+ {
1215
+ "name": "model.layers.13.feed_forward.w2.weight",
1216
+ "shape": [
1217
+ 2048,
1218
+ 8192
1219
+ ],
1220
+ "dtype": "float16",
1221
+ "format": "f32-to-bf16",
1222
+ "nbytes": 33554432,
1223
+ "byteOffset": 0
1224
+ }
1225
+ ],
1226
+ "md5sum": "0c77b71d9d73936cf23ffc1c0c9b9399"
1227
+ },
1228
+ {
1229
+ "dataPath": "params_shard_43.bin",
1230
+ "format": "raw-shard",
1231
+ "nbytes": 25174016,
1232
+ "records": [
1233
+ {
1234
+ "name": "model.layers.13.attention.wo.weight",
1235
+ "shape": [
1236
+ 2048,
1237
+ 2048
1238
+ ],
1239
+ "dtype": "float16",
1240
+ "format": "f32-to-bf16",
1241
+ "nbytes": 8388608,
1242
+ "byteOffset": 0
1243
+ },
1244
+ {
1245
+ "name": "model.layers.13.attention_norm.weight",
1246
+ "shape": [
1247
+ 2048
1248
+ ],
1249
+ "dtype": "float16",
1250
+ "format": "f32-to-bf16",
1251
+ "nbytes": 4096,
1252
+ "byteOffset": 8388608
1253
+ },
1254
+ {
1255
+ "name": "model.layers.13.ffn_norm.weight",
1256
+ "shape": [
1257
+ 2048
1258
+ ],
1259
+ "dtype": "float16",
1260
+ "format": "f32-to-bf16",
1261
+ "nbytes": 4096,
1262
+ "byteOffset": 8392704
1263
+ },
1264
+ {
1265
+ "name": "model.layers.14.attention.wqkv.weight",
1266
+ "shape": [
1267
+ 4096,
1268
+ 2048
1269
+ ],
1270
+ "dtype": "float16",
1271
+ "format": "f32-to-bf16",
1272
+ "nbytes": 16777216,
1273
+ "byteOffset": 8396800
1274
+ }
1275
+ ],
1276
+ "md5sum": "f4b038503aa58e5641edf4587a862307"
1277
+ },
1278
+ {
1279
+ "dataPath": "params_shard_44.bin",
1280
+ "format": "raw-shard",
1281
+ "nbytes": 67108864,
1282
+ "records": [
1283
+ {
1284
+ "name": "model.layers.14.feed_forward.gate_up_proj.weight",
1285
+ "shape": [
1286
+ 16384,
1287
+ 2048
1288
+ ],
1289
+ "dtype": "float16",
1290
+ "format": "f32-to-bf16",
1291
+ "nbytes": 67108864,
1292
+ "byteOffset": 0
1293
+ }
1294
+ ],
1295
+ "md5sum": "fc95094dd77fd22b71cc741ca2a23646"
1296
+ },
1297
+ {
1298
+ "dataPath": "params_shard_45.bin",
1299
+ "format": "raw-shard",
1300
+ "nbytes": 33554432,
1301
+ "records": [
1302
+ {
1303
+ "name": "model.layers.14.feed_forward.w2.weight",
1304
+ "shape": [
1305
+ 2048,
1306
+ 8192
1307
+ ],
1308
+ "dtype": "float16",
1309
+ "format": "f32-to-bf16",
1310
+ "nbytes": 33554432,
1311
+ "byteOffset": 0
1312
+ }
1313
+ ],
1314
+ "md5sum": "0779d46f31a7dbe1de153ed47078dba2"
1315
+ },
1316
+ {
1317
+ "dataPath": "params_shard_46.bin",
1318
+ "format": "raw-shard",
1319
+ "nbytes": 25174016,
1320
+ "records": [
1321
+ {
1322
+ "name": "model.layers.14.attention.wo.weight",
1323
+ "shape": [
1324
+ 2048,
1325
+ 2048
1326
+ ],
1327
+ "dtype": "float16",
1328
+ "format": "f32-to-bf16",
1329
+ "nbytes": 8388608,
1330
+ "byteOffset": 0
1331
+ },
1332
+ {
1333
+ "name": "model.layers.14.attention_norm.weight",
1334
+ "shape": [
1335
+ 2048
1336
+ ],
1337
+ "dtype": "float16",
1338
+ "format": "f32-to-bf16",
1339
+ "nbytes": 4096,
1340
+ "byteOffset": 8388608
1341
+ },
1342
+ {
1343
+ "name": "model.layers.14.ffn_norm.weight",
1344
+ "shape": [
1345
+ 2048
1346
+ ],
1347
+ "dtype": "float16",
1348
+ "format": "f32-to-bf16",
1349
+ "nbytes": 4096,
1350
+ "byteOffset": 8392704
1351
+ },
1352
+ {
1353
+ "name": "model.layers.15.attention.wqkv.weight",
1354
+ "shape": [
1355
+ 4096,
1356
+ 2048
1357
+ ],
1358
+ "dtype": "float16",
1359
+ "format": "f32-to-bf16",
1360
+ "nbytes": 16777216,
1361
+ "byteOffset": 8396800
1362
+ }
1363
+ ],
1364
+ "md5sum": "cb9ac262924e1f6e5f7afac1ce9dda54"
1365
+ },
1366
+ {
1367
+ "dataPath": "params_shard_47.bin",
1368
+ "format": "raw-shard",
1369
+ "nbytes": 67108864,
1370
+ "records": [
1371
+ {
1372
+ "name": "model.layers.15.feed_forward.gate_up_proj.weight",
1373
+ "shape": [
1374
+ 16384,
1375
+ 2048
1376
+ ],
1377
+ "dtype": "float16",
1378
+ "format": "f32-to-bf16",
1379
+ "nbytes": 67108864,
1380
+ "byteOffset": 0
1381
+ }
1382
+ ],
1383
+ "md5sum": "b21a9db197211f02e2453cd8aac1a5f9"
1384
+ },
1385
+ {
1386
+ "dataPath": "params_shard_48.bin",
1387
+ "format": "raw-shard",
1388
+ "nbytes": 33554432,
1389
+ "records": [
1390
+ {
1391
+ "name": "model.layers.15.feed_forward.w2.weight",
1392
+ "shape": [
1393
+ 2048,
1394
+ 8192
1395
+ ],
1396
+ "dtype": "float16",
1397
+ "format": "f32-to-bf16",
1398
+ "nbytes": 33554432,
1399
+ "byteOffset": 0
1400
+ }
1401
+ ],
1402
+ "md5sum": "66d68e0373d2d3dc8ed25e161d6d817e"
1403
+ },
1404
+ {
1405
+ "dataPath": "params_shard_49.bin",
1406
+ "format": "raw-shard",
1407
+ "nbytes": 25174016,
1408
+ "records": [
1409
+ {
1410
+ "name": "model.layers.15.attention.wo.weight",
1411
+ "shape": [
1412
+ 2048,
1413
+ 2048
1414
+ ],
1415
+ "dtype": "float16",
1416
+ "format": "f32-to-bf16",
1417
+ "nbytes": 8388608,
1418
+ "byteOffset": 0
1419
+ },
1420
+ {
1421
+ "name": "model.layers.15.attention_norm.weight",
1422
+ "shape": [
1423
+ 2048
1424
+ ],
1425
+ "dtype": "float16",
1426
+ "format": "f32-to-bf16",
1427
+ "nbytes": 4096,
1428
+ "byteOffset": 8388608
1429
+ },
1430
+ {
1431
+ "name": "model.layers.15.ffn_norm.weight",
1432
+ "shape": [
1433
+ 2048
1434
+ ],
1435
+ "dtype": "float16",
1436
+ "format": "f32-to-bf16",
1437
+ "nbytes": 4096,
1438
+ "byteOffset": 8392704
1439
+ },
1440
+ {
1441
+ "name": "model.layers.16.attention.wqkv.weight",
1442
+ "shape": [
1443
+ 4096,
1444
+ 2048
1445
+ ],
1446
+ "dtype": "float16",
1447
+ "format": "f32-to-bf16",
1448
+ "nbytes": 16777216,
1449
+ "byteOffset": 8396800
1450
+ }
1451
+ ],
1452
+ "md5sum": "7ea0c9c7f983448b8ae224c47f447198"
1453
+ },
1454
+ {
1455
+ "dataPath": "params_shard_50.bin",
1456
+ "format": "raw-shard",
1457
+ "nbytes": 67108864,
1458
+ "records": [
1459
+ {
1460
+ "name": "model.layers.16.feed_forward.gate_up_proj.weight",
1461
+ "shape": [
1462
+ 16384,
1463
+ 2048
1464
+ ],
1465
+ "dtype": "float16",
1466
+ "format": "f32-to-bf16",
1467
+ "nbytes": 67108864,
1468
+ "byteOffset": 0
1469
+ }
1470
+ ],
1471
+ "md5sum": "ba5262dce77d42c47e6c50632ba264ef"
1472
+ },
1473
+ {
1474
+ "dataPath": "params_shard_51.bin",
1475
+ "format": "raw-shard",
1476
+ "nbytes": 33554432,
1477
+ "records": [
1478
+ {
1479
+ "name": "model.layers.16.feed_forward.w2.weight",
1480
+ "shape": [
1481
+ 2048,
1482
+ 8192
1483
+ ],
1484
+ "dtype": "float16",
1485
+ "format": "f32-to-bf16",
1486
+ "nbytes": 33554432,
1487
+ "byteOffset": 0
1488
+ }
1489
+ ],
1490
+ "md5sum": "ef98e9f936328631b1e07249d359caca"
1491
+ },
1492
+ {
1493
+ "dataPath": "params_shard_52.bin",
1494
+ "format": "raw-shard",
1495
+ "nbytes": 25174016,
1496
+ "records": [
1497
+ {
1498
+ "name": "model.layers.16.attention.wo.weight",
1499
+ "shape": [
1500
+ 2048,
1501
+ 2048
1502
+ ],
1503
+ "dtype": "float16",
1504
+ "format": "f32-to-bf16",
1505
+ "nbytes": 8388608,
1506
+ "byteOffset": 0
1507
+ },
1508
+ {
1509
+ "name": "model.layers.16.attention_norm.weight",
1510
+ "shape": [
1511
+ 2048
1512
+ ],
1513
+ "dtype": "float16",
1514
+ "format": "f32-to-bf16",
1515
+ "nbytes": 4096,
1516
+ "byteOffset": 8388608
1517
+ },
1518
+ {
1519
+ "name": "model.layers.16.ffn_norm.weight",
1520
+ "shape": [
1521
+ 2048
1522
+ ],
1523
+ "dtype": "float16",
1524
+ "format": "f32-to-bf16",
1525
+ "nbytes": 4096,
1526
+ "byteOffset": 8392704
1527
+ },
1528
+ {
1529
+ "name": "model.layers.17.attention.wqkv.weight",
1530
+ "shape": [
1531
+ 4096,
1532
+ 2048
1533
+ ],
1534
+ "dtype": "float16",
1535
+ "format": "f32-to-bf16",
1536
+ "nbytes": 16777216,
1537
+ "byteOffset": 8396800
1538
+ }
1539
+ ],
1540
+ "md5sum": "bdc69580a1be1538f4b8860b1b095f4e"
1541
+ },
1542
+ {
1543
+ "dataPath": "params_shard_53.bin",
1544
+ "format": "raw-shard",
1545
+ "nbytes": 67108864,
1546
+ "records": [
1547
+ {
1548
+ "name": "model.layers.17.feed_forward.gate_up_proj.weight",
1549
+ "shape": [
1550
+ 16384,
1551
+ 2048
1552
+ ],
1553
+ "dtype": "float16",
1554
+ "format": "f32-to-bf16",
1555
+ "nbytes": 67108864,
1556
+ "byteOffset": 0
1557
+ }
1558
+ ],
1559
+ "md5sum": "01502631503072992ee7b6ba289f9607"
1560
+ },
1561
+ {
1562
+ "dataPath": "params_shard_54.bin",
1563
+ "format": "raw-shard",
1564
+ "nbytes": 33554432,
1565
+ "records": [
1566
+ {
1567
+ "name": "model.layers.17.feed_forward.w2.weight",
1568
+ "shape": [
1569
+ 2048,
1570
+ 8192
1571
+ ],
1572
+ "dtype": "float16",
1573
+ "format": "f32-to-bf16",
1574
+ "nbytes": 33554432,
1575
+ "byteOffset": 0
1576
+ }
1577
+ ],
1578
+ "md5sum": "49d9a61e33dcdf8a054187b1049eba40"
1579
+ },
1580
+ {
1581
+ "dataPath": "params_shard_55.bin",
1582
+ "format": "raw-shard",
1583
+ "nbytes": 25174016,
1584
+ "records": [
1585
+ {
1586
+ "name": "model.layers.17.attention.wo.weight",
1587
+ "shape": [
1588
+ 2048,
1589
+ 2048
1590
+ ],
1591
+ "dtype": "float16",
1592
+ "format": "f32-to-bf16",
1593
+ "nbytes": 8388608,
1594
+ "byteOffset": 0
1595
+ },
1596
+ {
1597
+ "name": "model.layers.17.attention_norm.weight",
1598
+ "shape": [
1599
+ 2048
1600
+ ],
1601
+ "dtype": "float16",
1602
+ "format": "f32-to-bf16",
1603
+ "nbytes": 4096,
1604
+ "byteOffset": 8388608
1605
+ },
1606
+ {
1607
+ "name": "model.layers.17.ffn_norm.weight",
1608
+ "shape": [
1609
+ 2048
1610
+ ],
1611
+ "dtype": "float16",
1612
+ "format": "f32-to-bf16",
1613
+ "nbytes": 4096,
1614
+ "byteOffset": 8392704
1615
+ },
1616
+ {
1617
+ "name": "model.layers.18.attention.wqkv.weight",
1618
+ "shape": [
1619
+ 4096,
1620
+ 2048
1621
+ ],
1622
+ "dtype": "float16",
1623
+ "format": "f32-to-bf16",
1624
+ "nbytes": 16777216,
1625
+ "byteOffset": 8396800
1626
+ }
1627
+ ],
1628
+ "md5sum": "fcf5b35d38ed1317876899d604989ff5"
1629
+ },
1630
+ {
1631
+ "dataPath": "params_shard_56.bin",
1632
+ "format": "raw-shard",
1633
+ "nbytes": 67108864,
1634
+ "records": [
1635
+ {
1636
+ "name": "model.layers.18.feed_forward.gate_up_proj.weight",
1637
+ "shape": [
1638
+ 16384,
1639
+ 2048
1640
+ ],
1641
+ "dtype": "float16",
1642
+ "format": "f32-to-bf16",
1643
+ "nbytes": 67108864,
1644
+ "byteOffset": 0
1645
+ }
1646
+ ],
1647
+ "md5sum": "a2e5924cf0ac9933abc7e76a5907ecc8"
1648
+ },
1649
+ {
1650
+ "dataPath": "params_shard_57.bin",
1651
+ "format": "raw-shard",
1652
+ "nbytes": 33554432,
1653
+ "records": [
1654
+ {
1655
+ "name": "model.layers.18.feed_forward.w2.weight",
1656
+ "shape": [
1657
+ 2048,
1658
+ 8192
1659
+ ],
1660
+ "dtype": "float16",
1661
+ "format": "f32-to-bf16",
1662
+ "nbytes": 33554432,
1663
+ "byteOffset": 0
1664
+ }
1665
+ ],
1666
+ "md5sum": "283384e32b930f44d91f41786ce8766d"
1667
+ },
1668
+ {
1669
+ "dataPath": "params_shard_58.bin",
1670
+ "format": "raw-shard",
1671
+ "nbytes": 25174016,
1672
+ "records": [
1673
+ {
1674
+ "name": "model.layers.18.attention.wo.weight",
1675
+ "shape": [
1676
+ 2048,
1677
+ 2048
1678
+ ],
1679
+ "dtype": "float16",
1680
+ "format": "f32-to-bf16",
1681
+ "nbytes": 8388608,
1682
+ "byteOffset": 0
1683
+ },
1684
+ {
1685
+ "name": "model.layers.18.attention_norm.weight",
1686
+ "shape": [
1687
+ 2048
1688
+ ],
1689
+ "dtype": "float16",
1690
+ "format": "f32-to-bf16",
1691
+ "nbytes": 4096,
1692
+ "byteOffset": 8388608
1693
+ },
1694
+ {
1695
+ "name": "model.layers.18.ffn_norm.weight",
1696
+ "shape": [
1697
+ 2048
1698
+ ],
1699
+ "dtype": "float16",
1700
+ "format": "f32-to-bf16",
1701
+ "nbytes": 4096,
1702
+ "byteOffset": 8392704
1703
+ },
1704
+ {
1705
+ "name": "model.layers.19.attention.wqkv.weight",
1706
+ "shape": [
1707
+ 4096,
1708
+ 2048
1709
+ ],
1710
+ "dtype": "float16",
1711
+ "format": "f32-to-bf16",
1712
+ "nbytes": 16777216,
1713
+ "byteOffset": 8396800
1714
+ }
1715
+ ],
1716
+ "md5sum": "a34e69f01562020c50d436658b643204"
1717
+ },
1718
+ {
1719
+ "dataPath": "params_shard_59.bin",
1720
+ "format": "raw-shard",
1721
+ "nbytes": 67108864,
1722
+ "records": [
1723
+ {
1724
+ "name": "model.layers.19.feed_forward.gate_up_proj.weight",
1725
+ "shape": [
1726
+ 16384,
1727
+ 2048
1728
+ ],
1729
+ "dtype": "float16",
1730
+ "format": "f32-to-bf16",
1731
+ "nbytes": 67108864,
1732
+ "byteOffset": 0
1733
+ }
1734
+ ],
1735
+ "md5sum": "6ab434fab1ed92442c4d50568ddbffd4"
1736
+ },
1737
+ {
1738
+ "dataPath": "params_shard_60.bin",
1739
+ "format": "raw-shard",
1740
+ "nbytes": 33554432,
1741
+ "records": [
1742
+ {
1743
+ "name": "model.layers.19.feed_forward.w2.weight",
1744
+ "shape": [
1745
+ 2048,
1746
+ 8192
1747
+ ],
1748
+ "dtype": "float16",
1749
+ "format": "f32-to-bf16",
1750
+ "nbytes": 33554432,
1751
+ "byteOffset": 0
1752
+ }
1753
+ ],
1754
+ "md5sum": "fede46845f6e20a3387630cc52583457"
1755
+ },
1756
+ {
1757
+ "dataPath": "params_shard_61.bin",
1758
+ "format": "raw-shard",
1759
+ "nbytes": 25174016,
1760
+ "records": [
1761
+ {
1762
+ "name": "model.layers.19.attention.wo.weight",
1763
+ "shape": [
1764
+ 2048,
1765
+ 2048
1766
+ ],
1767
+ "dtype": "float16",
1768
+ "format": "f32-to-bf16",
1769
+ "nbytes": 8388608,
1770
+ "byteOffset": 0
1771
+ },
1772
+ {
1773
+ "name": "model.layers.19.attention_norm.weight",
1774
+ "shape": [
1775
+ 2048
1776
+ ],
1777
+ "dtype": "float16",
1778
+ "format": "f32-to-bf16",
1779
+ "nbytes": 4096,
1780
+ "byteOffset": 8388608
1781
+ },
1782
+ {
1783
+ "name": "model.layers.19.ffn_norm.weight",
1784
+ "shape": [
1785
+ 2048
1786
+ ],
1787
+ "dtype": "float16",
1788
+ "format": "f32-to-bf16",
1789
+ "nbytes": 4096,
1790
+ "byteOffset": 8392704
1791
+ },
1792
+ {
1793
+ "name": "model.layers.20.attention.wqkv.weight",
1794
+ "shape": [
1795
+ 4096,
1796
+ 2048
1797
+ ],
1798
+ "dtype": "float16",
1799
+ "format": "f32-to-bf16",
1800
+ "nbytes": 16777216,
1801
+ "byteOffset": 8396800
1802
+ }
1803
+ ],
1804
+ "md5sum": "ba467d7ddd7ec9238c6463091e80c9ab"
1805
+ },
1806
+ {
1807
+ "dataPath": "params_shard_62.bin",
1808
+ "format": "raw-shard",
1809
+ "nbytes": 67108864,
1810
+ "records": [
1811
+ {
1812
+ "name": "model.layers.20.feed_forward.gate_up_proj.weight",
1813
+ "shape": [
1814
+ 16384,
1815
+ 2048
1816
+ ],
1817
+ "dtype": "float16",
1818
+ "format": "f32-to-bf16",
1819
+ "nbytes": 67108864,
1820
+ "byteOffset": 0
1821
+ }
1822
+ ],
1823
+ "md5sum": "b331223ab22e514ba30311898a491a9f"
1824
+ },
1825
+ {
1826
+ "dataPath": "params_shard_63.bin",
1827
+ "format": "raw-shard",
1828
+ "nbytes": 33554432,
1829
+ "records": [
1830
+ {
1831
+ "name": "model.layers.20.feed_forward.w2.weight",
1832
+ "shape": [
1833
+ 2048,
1834
+ 8192
1835
+ ],
1836
+ "dtype": "float16",
1837
+ "format": "f32-to-bf16",
1838
+ "nbytes": 33554432,
1839
+ "byteOffset": 0
1840
+ }
1841
+ ],
1842
+ "md5sum": "922de5be8f5163e2437b95d8f7abfba3"
1843
+ },
1844
+ {
1845
+ "dataPath": "params_shard_64.bin",
1846
+ "format": "raw-shard",
1847
+ "nbytes": 25174016,
1848
+ "records": [
1849
+ {
1850
+ "name": "model.layers.20.attention.wo.weight",
1851
+ "shape": [
1852
+ 2048,
1853
+ 2048
1854
+ ],
1855
+ "dtype": "float16",
1856
+ "format": "f32-to-bf16",
1857
+ "nbytes": 8388608,
1858
+ "byteOffset": 0
1859
+ },
1860
+ {
1861
+ "name": "model.layers.20.attention_norm.weight",
1862
+ "shape": [
1863
+ 2048
1864
+ ],
1865
+ "dtype": "float16",
1866
+ "format": "f32-to-bf16",
1867
+ "nbytes": 4096,
1868
+ "byteOffset": 8388608
1869
+ },
1870
+ {
1871
+ "name": "model.layers.20.ffn_norm.weight",
1872
+ "shape": [
1873
+ 2048
1874
+ ],
1875
+ "dtype": "float16",
1876
+ "format": "f32-to-bf16",
1877
+ "nbytes": 4096,
1878
+ "byteOffset": 8392704
1879
+ },
1880
+ {
1881
+ "name": "model.layers.21.attention.wqkv.weight",
1882
+ "shape": [
1883
+ 4096,
1884
+ 2048
1885
+ ],
1886
+ "dtype": "float16",
1887
+ "format": "f32-to-bf16",
1888
+ "nbytes": 16777216,
1889
+ "byteOffset": 8396800
1890
+ }
1891
+ ],
1892
+ "md5sum": "7e81d6ec7d477fa616946139388ddc08"
1893
+ },
1894
+ {
1895
+ "dataPath": "params_shard_65.bin",
1896
+ "format": "raw-shard",
1897
+ "nbytes": 67108864,
1898
+ "records": [
1899
+ {
1900
+ "name": "model.layers.21.feed_forward.gate_up_proj.weight",
1901
+ "shape": [
1902
+ 16384,
1903
+ 2048
1904
+ ],
1905
+ "dtype": "float16",
1906
+ "format": "f32-to-bf16",
1907
+ "nbytes": 67108864,
1908
+ "byteOffset": 0
1909
+ }
1910
+ ],
1911
+ "md5sum": "82ac1735c6afb9be52902c700aea7a12"
1912
+ },
1913
+ {
1914
+ "dataPath": "params_shard_66.bin",
1915
+ "format": "raw-shard",
1916
+ "nbytes": 33554432,
1917
+ "records": [
1918
+ {
1919
+ "name": "model.layers.21.feed_forward.w2.weight",
1920
+ "shape": [
1921
+ 2048,
1922
+ 8192
1923
+ ],
1924
+ "dtype": "float16",
1925
+ "format": "f32-to-bf16",
1926
+ "nbytes": 33554432,
1927
+ "byteOffset": 0
1928
+ }
1929
+ ],
1930
+ "md5sum": "12ce1c76efe436f02473a58282bb9133"
1931
+ },
1932
+ {
1933
+ "dataPath": "params_shard_67.bin",
1934
+ "format": "raw-shard",
1935
+ "nbytes": 25174016,
1936
+ "records": [
1937
+ {
1938
+ "name": "model.layers.21.attention.wo.weight",
1939
+ "shape": [
1940
+ 2048,
1941
+ 2048
1942
+ ],
1943
+ "dtype": "float16",
1944
+ "format": "f32-to-bf16",
1945
+ "nbytes": 8388608,
1946
+ "byteOffset": 0
1947
+ },
1948
+ {
1949
+ "name": "model.layers.21.attention_norm.weight",
1950
+ "shape": [
1951
+ 2048
1952
+ ],
1953
+ "dtype": "float16",
1954
+ "format": "f32-to-bf16",
1955
+ "nbytes": 4096,
1956
+ "byteOffset": 8388608
1957
+ },
1958
+ {
1959
+ "name": "model.layers.21.ffn_norm.weight",
1960
+ "shape": [
1961
+ 2048
1962
+ ],
1963
+ "dtype": "float16",
1964
+ "format": "f32-to-bf16",
1965
+ "nbytes": 4096,
1966
+ "byteOffset": 8392704
1967
+ },
1968
+ {
1969
+ "name": "model.layers.22.attention.wqkv.weight",
1970
+ "shape": [
1971
+ 4096,
1972
+ 2048
1973
+ ],
1974
+ "dtype": "float16",
1975
+ "format": "f32-to-bf16",
1976
+ "nbytes": 16777216,
1977
+ "byteOffset": 8396800
1978
+ }
1979
+ ],
1980
+ "md5sum": "28c1604c347cab66d2927f2b459defed"
1981
+ },
1982
+ {
1983
+ "dataPath": "params_shard_68.bin",
1984
+ "format": "raw-shard",
1985
+ "nbytes": 67108864,
1986
+ "records": [
1987
+ {
1988
+ "name": "model.layers.22.feed_forward.gate_up_proj.weight",
1989
+ "shape": [
1990
+ 16384,
1991
+ 2048
1992
+ ],
1993
+ "dtype": "float16",
1994
+ "format": "f32-to-bf16",
1995
+ "nbytes": 67108864,
1996
+ "byteOffset": 0
1997
+ }
1998
+ ],
1999
+ "md5sum": "32a91a4d279fc93278dcfbfb15dc0d5d"
2000
+ },
2001
+ {
2002
+ "dataPath": "params_shard_69.bin",
2003
+ "format": "raw-shard",
2004
+ "nbytes": 33554432,
2005
+ "records": [
2006
+ {
2007
+ "name": "model.layers.22.feed_forward.w2.weight",
2008
+ "shape": [
2009
+ 2048,
2010
+ 8192
2011
+ ],
2012
+ "dtype": "float16",
2013
+ "format": "f32-to-bf16",
2014
+ "nbytes": 33554432,
2015
+ "byteOffset": 0
2016
+ }
2017
+ ],
2018
+ "md5sum": "9b551d0279e7ea93eb08cb68b29bf71a"
2019
+ },
2020
+ {
2021
+ "dataPath": "params_shard_70.bin",
2022
+ "format": "raw-shard",
2023
+ "nbytes": 25174016,
2024
+ "records": [
2025
+ {
2026
+ "name": "model.layers.22.attention.wo.weight",
2027
+ "shape": [
2028
+ 2048,
2029
+ 2048
2030
+ ],
2031
+ "dtype": "float16",
2032
+ "format": "f32-to-bf16",
2033
+ "nbytes": 8388608,
2034
+ "byteOffset": 0
2035
+ },
2036
+ {
2037
+ "name": "model.layers.22.attention_norm.weight",
2038
+ "shape": [
2039
+ 2048
2040
+ ],
2041
+ "dtype": "float16",
2042
+ "format": "f32-to-bf16",
2043
+ "nbytes": 4096,
2044
+ "byteOffset": 8388608
2045
+ },
2046
+ {
2047
+ "name": "model.layers.22.ffn_norm.weight",
2048
+ "shape": [
2049
+ 2048
2050
+ ],
2051
+ "dtype": "float16",
2052
+ "format": "f32-to-bf16",
2053
+ "nbytes": 4096,
2054
+ "byteOffset": 8392704
2055
+ },
2056
+ {
2057
+ "name": "model.layers.23.attention.wqkv.weight",
2058
+ "shape": [
2059
+ 4096,
2060
+ 2048
2061
+ ],
2062
+ "dtype": "float16",
2063
+ "format": "f32-to-bf16",
2064
+ "nbytes": 16777216,
2065
+ "byteOffset": 8396800
2066
+ }
2067
+ ],
2068
+ "md5sum": "56eb834474d9be0bb245e3fd3a012950"
2069
+ },
2070
+ {
2071
+ "dataPath": "params_shard_71.bin",
2072
+ "format": "raw-shard",
2073
+ "nbytes": 67108864,
2074
+ "records": [
2075
+ {
2076
+ "name": "model.layers.23.feed_forward.gate_up_proj.weight",
2077
+ "shape": [
2078
+ 16384,
2079
+ 2048
2080
+ ],
2081
+ "dtype": "float16",
2082
+ "format": "f32-to-bf16",
2083
+ "nbytes": 67108864,
2084
+ "byteOffset": 0
2085
+ }
2086
+ ],
2087
+ "md5sum": "ff6264f5e5d1279434606ce9d6041084"
2088
+ },
2089
+ {
2090
+ "dataPath": "params_shard_72.bin",
2091
+ "format": "raw-shard",
2092
+ "nbytes": 33554432,
2093
+ "records": [
2094
+ {
2095
+ "name": "model.layers.23.feed_forward.w2.weight",
2096
+ "shape": [
2097
+ 2048,
2098
+ 8192
2099
+ ],
2100
+ "dtype": "float16",
2101
+ "format": "f32-to-bf16",
2102
+ "nbytes": 33554432,
2103
+ "byteOffset": 0
2104
+ }
2105
+ ],
2106
+ "md5sum": "002fbfadeee34e731aa3393b7be3ef2c"
2107
+ },
2108
+ {
2109
+ "dataPath": "params_shard_73.bin",
2110
+ "format": "raw-shard",
2111
+ "nbytes": 379060224,
2112
+ "records": [
2113
+ {
2114
+ "name": "output.weight",
2115
+ "shape": [
2116
+ 92544,
2117
+ 2048
2118
+ ],
2119
+ "dtype": "float16",
2120
+ "format": "f32-to-bf16",
2121
+ "nbytes": 379060224,
2122
+ "byteOffset": 0
2123
+ }
2124
+ ],
2125
+ "md5sum": "44dadfc1e5ec5a1e920d17a727bb730a"
2126
+ },
2127
+ {
2128
+ "dataPath": "params_shard_74.bin",
2129
+ "format": "raw-shard",
2130
+ "nbytes": 8400896,
2131
+ "records": [
2132
+ {
2133
+ "name": "model.layers.23.attention.wo.weight",
2134
+ "shape": [
2135
+ 2048,
2136
+ 2048
2137
+ ],
2138
+ "dtype": "float16",
2139
+ "format": "f32-to-bf16",
2140
+ "nbytes": 8388608,
2141
+ "byteOffset": 0
2142
+ },
2143
+ {
2144
+ "name": "model.layers.23.attention_norm.weight",
2145
+ "shape": [
2146
+ 2048
2147
+ ],
2148
+ "dtype": "float16",
2149
+ "format": "f32-to-bf16",
2150
+ "nbytes": 4096,
2151
+ "byteOffset": 8388608
2152
+ },
2153
+ {
2154
+ "name": "model.layers.23.ffn_norm.weight",
2155
+ "shape": [
2156
+ 2048
2157
+ ],
2158
+ "dtype": "float16",
2159
+ "format": "f32-to-bf16",
2160
+ "nbytes": 4096,
2161
+ "byteOffset": 8392704
2162
+ },
2163
+ {
2164
+ "name": "model.norm.weight",
2165
+ "shape": [
2166
+ 2048
2167
+ ],
2168
+ "dtype": "float16",
2169
+ "format": "f32-to-bf16",
2170
+ "nbytes": 4096,
2171
+ "byteOffset": 8396800
2172
+ }
2173
+ ],
2174
+ "md5sum": "6e98b51d4e6355c1bc64a23646a583c9"
2175
+ }
2176
+ ]
2177
+ }
params_shard_0.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d49f9d9dcf2dcb48bd22592f90cec38567efa714ab58f11f80535b271bf1fbd9
3
+ size 379060224
params_shard_1.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0d21d57dd6d8b1e3467ebaa2be079c538d17192f397b1f88a756b4f02a31b875
3
+ size 67108864
params_shard_10.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d86d33ed95f5163b10dfd62845600a131476506e39a4469d82c3dec8ccfeba54
3
+ size 25174016
params_shard_11.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a2930d16b44dcb0a1fe35f704a488ddf6304d9c9b4fca1f9c6cf80748f85ddb4
3
+ size 67108864
params_shard_12.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a4ac5d7c533a025978875a2277aed769d3fcfb498fb41d6c6a3bd7cbd46a9032
3
+ size 33554432
params_shard_13.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a2ab432b208f9fa1dcee8ab6113e4d4fd0f0b3cb5ec64a79efccb770d251975b
3
+ size 25174016
params_shard_14.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:628d70f9d66900ee7735deebabf0e058c1825f4bac6a4539b5d6010bd8c46853
3
+ size 67108864
params_shard_15.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5c192a853f038f0d5c454de5fef6330713bcc76eed619bd0be4a98ad8df14002
3
+ size 33554432
params_shard_16.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0fb3f5ae22c1840bf6b1b418189436cf548a077a54c5fd2314185e9b6af16ec7
3
+ size 25174016
params_shard_17.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:437f0fc80d72d83ff05a62431e74df1e4e91576100744dca11a678db1a8a9ed3
3
+ size 67108864
params_shard_18.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:addd6c5514981c69e6da439dccf4d14df2c9ccb021996a3005bfd9ad5cabc8dd
3
+ size 33554432
params_shard_19.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1df13cd35778cee954ba64868d93800aa84fdeb9eb0224f85512f7f29f7b7601
3
+ size 25174016
params_shard_2.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e870a76fe393027bcff9eee410186158bd0213c69d5813cb8f7df7cec39ba871
3
+ size 33554432
params_shard_20.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a446e393ea090a3cddad756e3a57d3229c0c60dbeb7352d97fe3cfd809de3497
3
+ size 67108864
params_shard_21.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b5ceafc7eb06c405014940ba7ba4f6e04c3363153ada4ba25f8a321611d6c593
3
+ size 33554432
params_shard_22.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:587a28b8611b4c451fe374d36f1e64bf8c9a4b10497ce34694e107c9691fe72a
3
+ size 25174016
params_shard_23.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:94372d91f4e2b7ea5015385d3d825aeba0fb5b13badb0234c11988cc24f831a9
3
+ size 67108864
params_shard_24.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8e0018c3d3d955bcc71b8f86aca5d7a914196f785777065ad9898dccef166a5f
3
+ size 33554432
params_shard_25.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:96b9a95457353a80c90c473b9fabc521f8f3ee28d587205e4996ca0980289255
3
+ size 25174016
params_shard_26.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e05ec67f35b61de6b65ba9b8c896aaf279250c7e75fef0e0c24d31eba686d5cc
3
+ size 67108864
params_shard_27.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aec7a660fdb8e00caa4e7d641a34ae0ef9c2dffe6a1deaeec7c1dbe1baeae9a2
3
+ size 33554432
params_shard_28.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5acf4e5ec6a4abe91fad9d0a5b4654be31912cd114041cabc757862993c8de69
3
+ size 25174016
params_shard_29.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7ad749c9eb8ab1e3ce53b2402d5454e1b8b65804fa8d22912723419b74a02abd
3
+ size 67108864
params_shard_3.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7ba8052ca0b645553038c0d512e364bf9944bb815309afb0323a7a08709d29ad
3
+ size 16777216
params_shard_30.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aaf3e604c92a19f8fd6c16443beab86b5eb4e5a3908259948e0bea6433be4c99
3
+ size 33554432
params_shard_31.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:48a7287c68518b5429ff66b4a28f4b166b84099dc0123976b42eae1d48f8d56e
3
+ size 25174016
params_shard_32.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a28afdc053434d67aa7c989112cc0984ee5a08b8f8c86209de4eaa645f038c4c
3
+ size 67108864
params_shard_33.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ba0fcee843662361e5cbb7a8a614e6895752fb254ce6dcb83a9aa817ac9c216e
3
+ size 33554432
params_shard_34.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cc49137566669123cb53d7bf7e3d3a27a9ca1d7e92142c23ebab5f9cffa82079
3
+ size 25174016
params_shard_35.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:abfc032aafd9bb7a9bf880f40174c31860f4902b7bb00cee97ba5c10ef5706d2
3
+ size 67108864
params_shard_36.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3fcec2224060e05f352b565e9aa9f4f998255a9ed982fe61c57e814511da33e5
3
+ size 33554432
params_shard_37.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6b258b1c6fc67abc19f7032571ab024fa7ed295cc3cb3f154e146e040add05dd
3
+ size 25174016
params_shard_38.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:32fe07f17f50a87da813ee77737224c6f98e048320e73e9a65f751f8dea8326a
3
+ size 67108864
params_shard_39.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:501321d4e1684cf7d9768ead1089deb770a2f77b124e77a82dc5dd8543f2dfd5
3
+ size 33554432
params_shard_4.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bd877c9b686c99a9b54e77b2f12661a2dc804f136a418226d768d0bd4e472991
3
+ size 25174016
params_shard_40.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:de043aeaac8f2975d6656bcf3827783d15aab24a23d2775a576e99c3c4f83b97
3
+ size 25174016
params_shard_41.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:be98840427961d733bfd17c51b005b2b45afa7b4ef0d333bf718be5025328f24
3
+ size 67108864
params_shard_42.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3b2007cce8674b143359f07c185cde345f6a9d8277ca396300c1e48eabb3b0e7
3
+ size 33554432
params_shard_43.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6375ed08507fd1945865f77fc55cbb749295828d3eaf6d2264b6a1dabdff85a0
3
+ size 25174016
params_shard_44.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5dc760f3e22f5885573b62ea449251cebd35fbe8d68acccb3e619546413cbd25
3
+ size 67108864
params_shard_45.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b7ec84300949bd2a9cb1f2907c04fe27e35de85ebd8a58789cec996af94b0018
3
+ size 33554432
params_shard_46.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:808dea10c8c97ac1ad05e1c0ecf98467f964200121700f374f1f6fcd2bab0c62
3
+ size 25174016
params_shard_47.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9ab3472db2162ac212dd63215c223bab1c90c2ed8d7fd247634aaeb011d2617e
3
+ size 67108864
params_shard_48.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:724516333314171fc7c45a9789b6134c4eb3c0f9a26981d64296d294ef993184
3
+ size 33554432
params_shard_49.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:585396ec65812f7636757da5857f8bf1cfea42f7c75f839a2d406422458befb7
3
+ size 25174016
params_shard_5.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1172051d4ea3fe02d48464f18d20783a396a3c38aac996c131111d7d90c9f0a6
3
+ size 67108864
params_shard_50.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8e192e193fc5d0a03bafe642dc41017f7bf6dd817fa2790a61d516760bcf9ef7
3
+ size 67108864