Spaces:
Runtime error
Runtime error
import os | |
import torch | |
from transformers import AutoConfig, AutoModel, AutoTokenizer | |
# 载入Tokenizer | |
model_path = "..\\models\\chatglm-6b-int4" | |
CHECKPOINT_PATH = '.\\output\\adgen-chatglm-6b-pt-128-2e-2\\checkpoint-100' | |
tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True) | |
# 如果需要加载的是新 Checkpoint(只包含 PrefixEncoder 参数): | |
config = AutoConfig.from_pretrained(model_path, trust_remote_code=True, pre_seq_len=128) | |
model = AutoModel.from_pretrained(model_path, config=config, trust_remote_code=True) | |
prefix_state_dict = torch.load(os.path.join(CHECKPOINT_PATH, "pytorch_model.bin")) | |
new_prefix_state_dict = {} | |
for k, v in prefix_state_dict.items(): | |
if k.startswith("transformer.prefix_encoder."): | |
new_prefix_state_dict[k[len("transformer.prefix_encoder."):]] = v | |
model.transformer.prefix_encoder.load_state_dict(new_prefix_state_dict) | |
# 之后根据需求可以进行量化,也可以直接使用: | |
kernel_file = "{}\\quantization_kernels.so".format(model_path) | |
model = model.quantize(bits=4,kernel_file=kernel_file) | |
model = model.half().cuda() | |
model.transformer.prefix_encoder.float() | |
model = model.eval() | |
response, history = model.chat(tokenizer, "你好呀", history=[]) | |
print("response:", response) |