Upload paligemma_to_gguf.py
Browse files- paligemma_to_gguf.py +446 -0
paligemma_to_gguf.py
ADDED
@@ -0,0 +1,446 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import json
|
3 |
+
import typing
|
4 |
+
import pathlib
|
5 |
+
import argparse
|
6 |
+
|
7 |
+
import numpy as np
|
8 |
+
import numpy.typing as npt
|
9 |
+
|
10 |
+
import gguf
|
11 |
+
|
12 |
+
from safetensors import safe_open
|
13 |
+
|
14 |
+
|
15 |
+
class SafetensorsIndexFile(typing.TypedDict):
|
16 |
+
weight_map: typing.Dict[str, str]
|
17 |
+
|
18 |
+
|
19 |
+
class SafetensorsIndex:
|
20 |
+
def __init__(self, index_file_path: str):
|
21 |
+
directory = os.path.dirname(index_file_path)
|
22 |
+
self.index = typing.cast(SafetensorsIndexFile, json.load(open(index_file_path)))
|
23 |
+
self.weight_map = self.index["weight_map"]
|
24 |
+
files = set(self.weight_map.values())
|
25 |
+
self.tensors = {file: safe_open(os.path.join(directory, file), framework="np") for file in files}
|
26 |
+
|
27 |
+
def get_tensor(self, key: str) -> npt.NDArray[np.float32]:
|
28 |
+
return typing.cast(npt.NDArray[np.float32], self.tensors[self.weight_map[key]].get_tensor(key)) # type: ignore
|
29 |
+
|
30 |
+
|
31 |
+
def k(raw_key: str, arch: str) -> str:
|
32 |
+
return raw_key.format(arch=arch)
|
33 |
+
|
34 |
+
def does_token_look_special(token: typing.Union[str, bytes]) -> bool:
|
35 |
+
if isinstance(token, (bytes, bytearray)):
|
36 |
+
token_text = token.decode(encoding="utf-8")
|
37 |
+
elif isinstance(token, memoryview):
|
38 |
+
token_text = token.tobytes().decode(encoding="utf-8")
|
39 |
+
else:
|
40 |
+
token_text = token
|
41 |
+
|
42 |
+
# Some models mark some added tokens which ought to be control tokens as not special.
|
43 |
+
# (e.g. command-r, command-r-plus, deepseek-coder, gemma{,-2})
|
44 |
+
seems_special = token_text in (
|
45 |
+
"<pad>", # deepseek-coder
|
46 |
+
"<mask>", "<2mass>", "[@BOS@]", # gemma{,-2}
|
47 |
+
)
|
48 |
+
|
49 |
+
seems_special = seems_special or (token_text.startswith("<|") and token_text.endswith("|>"))
|
50 |
+
seems_special = seems_special or (token_text.startswith("<|") and token_text.endswith("|>")) # deepseek-coder
|
51 |
+
|
52 |
+
# TODO: should these be marked as UNUSED instead? (maybe not)
|
53 |
+
seems_special = seems_special or (token_text.startswith("<unused") and token_text.endswith(">")) # gemma{,-2}
|
54 |
+
|
55 |
+
return seems_special
|
56 |
+
|
57 |
+
|
58 |
+
if __name__ == "__main__":
|
59 |
+
parser = argparse.ArgumentParser()
|
60 |
+
parser.add_argument(
|
61 |
+
"-d",
|
62 |
+
"--dir-model",
|
63 |
+
required=True,
|
64 |
+
help="path to directory containing the tokenizer",
|
65 |
+
)
|
66 |
+
args = parser.parse_args()
|
67 |
+
|
68 |
+
dir_model = pathlib.Path(args.dir_model)
|
69 |
+
|
70 |
+
# set model name to folder name
|
71 |
+
name = dir_model.name
|
72 |
+
|
73 |
+
tensors = SafetensorsIndex((dir_model / "model.safetensors.index.json").as_posix())
|
74 |
+
|
75 |
+
config = json.load(open(dir_model / "config.json"))
|
76 |
+
text_config = {
|
77 |
+
"max_position_embeddings": 8192,
|
78 |
+
"rms_norm_eps": 1e-6,
|
79 |
+
"head_dim": 256
|
80 |
+
}
|
81 |
+
text_config.update(config["text_config"])
|
82 |
+
vision_config = config["vision_config"]
|
83 |
+
|
84 |
+
preprocessor_config = json.load(open(dir_model / "preprocessor_config.json"))
|
85 |
+
|
86 |
+
### Vision model
|
87 |
+
|
88 |
+
ftype = 1 # fp16
|
89 |
+
|
90 |
+
fname_middle = "mmproj-"
|
91 |
+
has_text_encoder = False
|
92 |
+
has_llava_projector = True
|
93 |
+
|
94 |
+
n_layers_clip = vision_config["num_hidden_layers"]
|
95 |
+
|
96 |
+
fname_out = f"{name}-mmproj-f16.gguf"
|
97 |
+
fout = gguf.GGUFWriter(fname_out, arch="clip")
|
98 |
+
|
99 |
+
fout.add_bool("clip.has_text_encoder", False)
|
100 |
+
fout.add_bool("clip.has_vision_encoder", True)
|
101 |
+
fout.add_bool("clip.has_llava_projector", True)
|
102 |
+
fout.add_file_type(ftype) # fp16
|
103 |
+
|
104 |
+
model_name = f"google/{name}"
|
105 |
+
fout.add_name(model_name)
|
106 |
+
fout.add_description("image encoder for " + model_name)
|
107 |
+
fout.add_string("clip.projector_type", "mlp")
|
108 |
+
|
109 |
+
image_size = vision_config.get("image_size", preprocessor_config["size"]["height"])
|
110 |
+
|
111 |
+
# vision model hparams
|
112 |
+
VISION = "clip.vision"
|
113 |
+
fout.add_uint32("clip.vision.image_size", image_size)
|
114 |
+
fout.add_uint32("clip.vision.patch_size", vision_config["patch_size"])
|
115 |
+
fout.add_uint32(k(gguf.KEY_EMBEDDING_LENGTH, VISION), vision_config["hidden_size"])
|
116 |
+
fout.add_uint32(k(gguf.KEY_FEED_FORWARD_LENGTH, VISION), vision_config["intermediate_size"])
|
117 |
+
fout.add_uint32("clip.vision.projection_dim", vision_config["projection_dim"])
|
118 |
+
fout.add_uint32(k(gguf.KEY_ATTENTION_HEAD_COUNT, VISION), vision_config["num_attention_heads"])
|
119 |
+
fout.add_float32(k(gguf.KEY_ATTENTION_LAYERNORM_EPS, VISION), 1e-6)
|
120 |
+
fout.add_uint32(k(gguf.KEY_BLOCK_COUNT, VISION), n_layers_clip + 1)
|
121 |
+
|
122 |
+
fout.add_array("clip.vision.image_mean", preprocessor_config["image_mean"])
|
123 |
+
fout.add_array("clip.vision.image_std", preprocessor_config["image_std"])
|
124 |
+
fout.add_bool("clip.use_gelu", vision_config["projector_hidden_act"] == "gelu")
|
125 |
+
fout.add_float32("clip.embeddings_scale", 1.0 / (config["projection_dim"]**0.5))
|
126 |
+
|
127 |
+
# vision projection
|
128 |
+
fout.add_tensor(
|
129 |
+
"mm.0.weight",
|
130 |
+
tensors.get_tensor("multi_modal_projector.linear.weight").astype(np.float16),
|
131 |
+
)
|
132 |
+
fout.add_tensor(
|
133 |
+
"mm.0.bias",
|
134 |
+
tensors.get_tensor("multi_modal_projector.linear.bias").astype(np.float32),
|
135 |
+
)
|
136 |
+
|
137 |
+
# encoder (siglip)
|
138 |
+
fout.add_tensor(
|
139 |
+
"v.position_embd.weight",
|
140 |
+
tensors.get_tensor("vision_tower.vision_model.embeddings.position_embedding.weight").astype(np.float16),
|
141 |
+
)
|
142 |
+
fout.add_tensor(
|
143 |
+
"v.patch_embd.weight",
|
144 |
+
tensors.get_tensor("vision_tower.vision_model.embeddings.patch_embedding.weight")
|
145 |
+
.reshape(vision_config["hidden_size"], 3, vision_config["patch_size"], vision_config["patch_size"])
|
146 |
+
.astype(np.float16),
|
147 |
+
)
|
148 |
+
fout.add_tensor(
|
149 |
+
"v.patch_embd.bias",
|
150 |
+
tensors.get_tensor("vision_tower.vision_model.embeddings.patch_embedding.bias").astype(np.float32),
|
151 |
+
)
|
152 |
+
|
153 |
+
fout.add_tensor(
|
154 |
+
"v.post_ln.weight",
|
155 |
+
tensors.get_tensor("vision_tower.vision_model.post_layernorm.weight").astype(np.float32),
|
156 |
+
)
|
157 |
+
fout.add_tensor(
|
158 |
+
"v.post_ln.bias",
|
159 |
+
tensors.get_tensor("vision_tower.vision_model.post_layernorm.bias").astype(np.float32),
|
160 |
+
)
|
161 |
+
|
162 |
+
def blk_tensor(i: int, name: str):
|
163 |
+
return tensors.get_tensor(
|
164 |
+
rf"vision_tower.vision_model.encoder.layers.{i}.{name}"
|
165 |
+
)
|
166 |
+
|
167 |
+
def add_tensor(blk_id: int, gguf_id: typing.Optional[int] = None):
|
168 |
+
if gguf_id is None:
|
169 |
+
gguf_id = blk_id
|
170 |
+
|
171 |
+
q_w = blk_tensor(blk_id, "self_attn.q_proj.weight")
|
172 |
+
k_w = blk_tensor(blk_id, "self_attn.k_proj.weight")
|
173 |
+
v_w = blk_tensor(blk_id, "self_attn.v_proj.weight")
|
174 |
+
q_b = blk_tensor(blk_id, "self_attn.q_proj.bias")
|
175 |
+
k_b = blk_tensor(blk_id, "self_attn.k_proj.bias")
|
176 |
+
v_b = blk_tensor(blk_id, "self_attn.v_proj.bias")
|
177 |
+
|
178 |
+
fout.add_tensor(f"v.blk.{gguf_id}.attn_q.weight", q_w.astype(np.float16))
|
179 |
+
fout.add_tensor(f"v.blk.{gguf_id}.attn_q.bias", q_b.astype(np.float32))
|
180 |
+
fout.add_tensor(f"v.blk.{gguf_id}.attn_k.weight", k_w.astype(np.float16))
|
181 |
+
fout.add_tensor(f"v.blk.{gguf_id}.attn_k.bias", k_b.astype(np.float32))
|
182 |
+
fout.add_tensor(f"v.blk.{gguf_id}.attn_v.weight", v_w.astype(np.float16))
|
183 |
+
fout.add_tensor(f"v.blk.{gguf_id}.attn_v.bias", v_b.astype(np.float32))
|
184 |
+
fout.add_tensor(
|
185 |
+
f"v.blk.{gguf_id}.attn_out.weight",
|
186 |
+
blk_tensor(blk_id, "self_attn.out_proj.weight").astype(np.float16),
|
187 |
+
)
|
188 |
+
fout.add_tensor(
|
189 |
+
f"v.blk.{gguf_id}.attn_out.bias",
|
190 |
+
blk_tensor(blk_id, "self_attn.out_proj.bias").astype(np.float32),
|
191 |
+
)
|
192 |
+
|
193 |
+
fout.add_tensor(
|
194 |
+
f"v.blk.{gguf_id}.ln1.weight",
|
195 |
+
blk_tensor(blk_id, "layer_norm1.weight").astype(np.float32),
|
196 |
+
)
|
197 |
+
fout.add_tensor(
|
198 |
+
f"v.blk.{gguf_id}.ln1.bias",
|
199 |
+
blk_tensor(blk_id, "layer_norm1.bias").astype(np.float32),
|
200 |
+
)
|
201 |
+
|
202 |
+
fout.add_tensor(
|
203 |
+
f"v.blk.{gguf_id}.ffn_down.weight",
|
204 |
+
blk_tensor(blk_id, "mlp.fc1.weight").astype(np.float16),
|
205 |
+
)
|
206 |
+
fout.add_tensor(
|
207 |
+
f"v.blk.{gguf_id}.ffn_down.bias",
|
208 |
+
blk_tensor(blk_id, "mlp.fc1.bias").astype(np.float32),
|
209 |
+
)
|
210 |
+
fout.add_tensor(
|
211 |
+
f"v.blk.{gguf_id}.ffn_up.weight",
|
212 |
+
blk_tensor(blk_id, "mlp.fc2.weight").astype(np.float16),
|
213 |
+
)
|
214 |
+
fout.add_tensor(
|
215 |
+
f"v.blk.{gguf_id}.ffn_up.bias",
|
216 |
+
blk_tensor(blk_id, "mlp.fc2.bias").astype(np.float32),
|
217 |
+
)
|
218 |
+
|
219 |
+
fout.add_tensor(
|
220 |
+
f"v.blk.{gguf_id}.ln2.weight",
|
221 |
+
blk_tensor(blk_id, "layer_norm2.weight").astype(np.float32),
|
222 |
+
)
|
223 |
+
fout.add_tensor(
|
224 |
+
f"v.blk.{gguf_id}.ln2.bias",
|
225 |
+
blk_tensor(blk_id, "layer_norm2.bias").astype(np.float32),
|
226 |
+
)
|
227 |
+
|
228 |
+
for i in range(n_layers_clip):
|
229 |
+
add_tensor(i)
|
230 |
+
|
231 |
+
# Duplicate the last block (llava-cli skips over this)
|
232 |
+
add_tensor(n_layers_clip - 1, n_layers_clip)
|
233 |
+
|
234 |
+
fout.write_header_to_file()
|
235 |
+
fout.write_kv_data_to_file()
|
236 |
+
fout.write_tensors_to_file()
|
237 |
+
fout.close()
|
238 |
+
|
239 |
+
print(f"GGUF written to {fname_out}")
|
240 |
+
|
241 |
+
### Text model
|
242 |
+
|
243 |
+
# general GGUF init
|
244 |
+
fname_out = f"{name}-text-model-f16.gguf"
|
245 |
+
fout = gguf.GGUFWriter(fname_out, arch="gemma")
|
246 |
+
ftype = 1
|
247 |
+
|
248 |
+
block_count = text_config["num_hidden_layers"]
|
249 |
+
|
250 |
+
fout.add_name(name)
|
251 |
+
fout.add_context_length(text_config["max_position_embeddings"])
|
252 |
+
fout.add_embedding_length(text_config["hidden_size"])
|
253 |
+
fout.add_block_count(block_count)
|
254 |
+
fout.add_feed_forward_length(text_config["intermediate_size"])
|
255 |
+
fout.add_head_count(text_config["num_attention_heads"])
|
256 |
+
fout.add_head_count_kv(text_config.get("num_key_value_heads") or text_config["num_attention_heads"])
|
257 |
+
fout.add_layer_norm_rms_eps(text_config["rms_norm_eps"])
|
258 |
+
fout.add_key_length(text_config["head_dim"])
|
259 |
+
fout.add_value_length(text_config["head_dim"])
|
260 |
+
fout.add_file_type(ftype)
|
261 |
+
# fout.add_add_bos_token(True)
|
262 |
+
|
263 |
+
|
264 |
+
### Tokenizer
|
265 |
+
|
266 |
+
# Taken from _set_vocab_sentencepiece
|
267 |
+
from enum import IntEnum
|
268 |
+
class SentencePieceTokenTypes(IntEnum):
|
269 |
+
NORMAL = 1
|
270 |
+
UNKNOWN = 2
|
271 |
+
CONTROL = 3
|
272 |
+
USER_DEFINED = 4
|
273 |
+
UNUSED = 5
|
274 |
+
BYTE = 6
|
275 |
+
|
276 |
+
from sentencepiece import SentencePieceProcessor
|
277 |
+
tokenizer_path = dir_model / 'tokenizer.model'
|
278 |
+
|
279 |
+
tokens: typing.List[bytes] = []
|
280 |
+
scores: typing.List[float] = []
|
281 |
+
toktypes: typing.List[int] = []
|
282 |
+
|
283 |
+
if not tokenizer_path.is_file():
|
284 |
+
raise FileNotFoundError(f"File not found: {tokenizer_path}")
|
285 |
+
|
286 |
+
tokenizer = SentencePieceProcessor()
|
287 |
+
tokenizer.LoadFromFile(str(tokenizer_path))
|
288 |
+
|
289 |
+
vocab_size = config["vocab_size"]
|
290 |
+
|
291 |
+
tokens: typing.List[bytes] = [f"[PAD{i}]".encode("utf-8") for i in range(vocab_size)]
|
292 |
+
scores: typing.List[float] = [-10000.0] * vocab_size
|
293 |
+
toktypes: typing.List[int] = [SentencePieceTokenTypes.UNKNOWN] * vocab_size
|
294 |
+
|
295 |
+
for token_id in range(tokenizer.vocab_size()):
|
296 |
+
piece = tokenizer.IdToPiece(token_id)
|
297 |
+
text = piece.encode("utf-8")
|
298 |
+
score = tokenizer.GetScore(token_id)
|
299 |
+
|
300 |
+
toktype = SentencePieceTokenTypes.NORMAL
|
301 |
+
if tokenizer.IsUnknown(token_id):
|
302 |
+
toktype = SentencePieceTokenTypes.UNKNOWN
|
303 |
+
elif tokenizer.IsControl(token_id):
|
304 |
+
toktype = SentencePieceTokenTypes.CONTROL
|
305 |
+
elif tokenizer.IsUnused(token_id):
|
306 |
+
toktype = SentencePieceTokenTypes.UNUSED
|
307 |
+
elif tokenizer.IsByte(token_id):
|
308 |
+
toktype = SentencePieceTokenTypes.BYTE
|
309 |
+
|
310 |
+
tokens[token_id] = text
|
311 |
+
scores[token_id] = score
|
312 |
+
toktypes[token_id] = toktype
|
313 |
+
|
314 |
+
added_tokens_file = dir_model / 'added_tokens.json'
|
315 |
+
if added_tokens_file.is_file():
|
316 |
+
with open(added_tokens_file, "r", encoding="utf-8") as f:
|
317 |
+
added_tokens_json = json.load(f)
|
318 |
+
for key in added_tokens_json:
|
319 |
+
token_id = added_tokens_json[key]
|
320 |
+
if (token_id >= vocab_size):
|
321 |
+
print(f'ignore token {token_id}: id is out of range, max={vocab_size - 1}')
|
322 |
+
continue
|
323 |
+
|
324 |
+
tokens[token_id] = key.encode("utf-8")
|
325 |
+
scores[token_id] = -1000.0
|
326 |
+
toktypes[token_id] = SentencePieceTokenTypes.USER_DEFINED
|
327 |
+
|
328 |
+
tokenizer_config_file = dir_model / 'tokenizer_config.json'
|
329 |
+
if tokenizer_config_file.is_file():
|
330 |
+
with open(tokenizer_config_file, "r", encoding="utf-8") as f:
|
331 |
+
tokenizer_config_json = json.load(f)
|
332 |
+
added_tokens_decoder = tokenizer_config_json.get("added_tokens_decoder", {})
|
333 |
+
for token_id, token_data in added_tokens_decoder.items():
|
334 |
+
token_id = int(token_id)
|
335 |
+
token: str = token_data["content"]
|
336 |
+
if toktypes[token_id] != SentencePieceTokenTypes.UNUSED:
|
337 |
+
if tokens[token_id] != token.encode("utf-8"):
|
338 |
+
logger.warning(f'replacing token {token_id}: {tokens[token_id].decode("utf-8")!r} -> {token!r}')
|
339 |
+
if token_data.get("special") or does_token_look_special(token):
|
340 |
+
toktypes[token_id] = SentencePieceTokenTypes.CONTROL
|
341 |
+
else:
|
342 |
+
token = token.replace(b"\xe2\x96\x81".decode("utf-8"), " ") # pre-normalize user-defined spaces
|
343 |
+
toktypes[token_id] = SentencePieceTokenTypes.USER_DEFINED
|
344 |
+
|
345 |
+
scores[token_id] = -1000.0
|
346 |
+
tokens[token_id] = token.encode("utf-8")
|
347 |
+
|
348 |
+
if vocab_size > len(tokens):
|
349 |
+
pad_count = vocab_size - len(tokens)
|
350 |
+
print(f"Padding vocab with {pad_count} token(s) - [PAD1] through [PAD{pad_count}]")
|
351 |
+
for i in range(1, pad_count + 1):
|
352 |
+
tokens.append(bytes(f"[PAD{i}]", encoding="utf-8"))
|
353 |
+
scores.append(-1000.0)
|
354 |
+
toktypes.append(SentencePieceTokenTypes.UNUSED)
|
355 |
+
|
356 |
+
fout.add_tokenizer_model("llama")
|
357 |
+
fout.add_tokenizer_pre("default")
|
358 |
+
fout.add_token_list(tokens)
|
359 |
+
fout.add_token_scores(scores)
|
360 |
+
fout.add_token_types(toktypes)
|
361 |
+
|
362 |
+
special_vocab = gguf.SpecialVocab(dir_model, n_vocab=len(tokens))
|
363 |
+
special_vocab.add_to_gguf(fout)
|
364 |
+
fout.add_add_space_prefix(False)
|
365 |
+
|
366 |
+
### Text model
|
367 |
+
|
368 |
+
fout.add_tensor(
|
369 |
+
"token_embd.weight",
|
370 |
+
tensors.get_tensor("language_model.model.embed_tokens.weight").astype(np.float16),
|
371 |
+
)
|
372 |
+
|
373 |
+
for i in range(text_config["num_hidden_layers"]):
|
374 |
+
fout.add_tensor(
|
375 |
+
f"blk.{i}.attn_norm.weight",
|
376 |
+
tensors.get_tensor(f"language_model.model.layers.{i}.input_layernorm.weight").astype(
|
377 |
+
np.float32
|
378 |
+
# https://github.com/huggingface/transformers/blob/fc37f38915372c15992b540dfcbbe00a916d4fc6/src/transformers/models/gemma/modeling_gemma.py#L89
|
379 |
+
) + 1,
|
380 |
+
)
|
381 |
+
|
382 |
+
fout.add_tensor(
|
383 |
+
f"blk.{i}.ffn_down.weight",
|
384 |
+
tensors.get_tensor(f"language_model.model.layers.{i}.mlp.down_proj.weight").astype(
|
385 |
+
np.float16
|
386 |
+
),
|
387 |
+
)
|
388 |
+
fout.add_tensor(
|
389 |
+
f"blk.{i}.ffn_gate.weight",
|
390 |
+
tensors.get_tensor(f"language_model.model.layers.{i}.mlp.gate_proj.weight").astype(
|
391 |
+
np.float16
|
392 |
+
),
|
393 |
+
)
|
394 |
+
|
395 |
+
fout.add_tensor(
|
396 |
+
f"blk.{i}.ffn_up.weight",
|
397 |
+
tensors.get_tensor(f"language_model.model.layers.{i}.mlp.up_proj.weight").astype(
|
398 |
+
np.float16
|
399 |
+
),
|
400 |
+
)
|
401 |
+
|
402 |
+
fout.add_tensor(
|
403 |
+
f"blk.{i}.ffn_norm.weight",
|
404 |
+
tensors.get_tensor(f"language_model.model.layers.{i}.post_attention_layernorm.weight").astype(
|
405 |
+
np.float32
|
406 |
+
) + 1,
|
407 |
+
)
|
408 |
+
|
409 |
+
fout.add_tensor(
|
410 |
+
f"blk.{i}.attn_k.weight",
|
411 |
+
tensors.get_tensor(
|
412 |
+
f"language_model.model.layers.{i}.self_attn.k_proj.weight"
|
413 |
+
).astype(np.float16),
|
414 |
+
)
|
415 |
+
fout.add_tensor(
|
416 |
+
f"blk.{i}.attn_output.weight",
|
417 |
+
tensors.get_tensor(
|
418 |
+
f"language_model.model.layers.{i}.self_attn.o_proj.weight"
|
419 |
+
).astype(np.float16),
|
420 |
+
)
|
421 |
+
fout.add_tensor(
|
422 |
+
f"blk.{i}.attn_q.weight",
|
423 |
+
tensors.get_tensor(
|
424 |
+
f"language_model.model.layers.{i}.self_attn.q_proj.weight"
|
425 |
+
).astype(np.float16),
|
426 |
+
)
|
427 |
+
fout.add_tensor(
|
428 |
+
f"blk.{i}.attn_v.weight",
|
429 |
+
tensors.get_tensor(
|
430 |
+
f"language_model.model.layers.{i}.self_attn.v_proj.weight"
|
431 |
+
).astype(np.float16),
|
432 |
+
)
|
433 |
+
|
434 |
+
fout.add_tensor(
|
435 |
+
"output_norm.weight",
|
436 |
+
tensors.get_tensor("language_model.model.norm.weight").astype(np.float32) + 1,
|
437 |
+
)
|
438 |
+
|
439 |
+
|
440 |
+
# save gguf
|
441 |
+
fout.write_header_to_file()
|
442 |
+
fout.write_kv_data_to_file()
|
443 |
+
fout.write_tensors_to_file()
|
444 |
+
fout.close()
|
445 |
+
|
446 |
+
print(f"GGUF written to {fname_out}")
|