import os import torch from transformers import AutoConfig, AutoModel, AutoTokenizer # 载入Tokenizer model_path = "..\\models\\chatglm-6b-int4" CHECKPOINT_PATH = '.\\output\\adgen-chatglm-6b-pt-128-2e-2\\checkpoint-1000' tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True) # 如果需要加载的是新 Checkpoint(只包含 PrefixEncoder 参数): config = AutoConfig.from_pretrained(model_path, trust_remote_code=True, pre_seq_len=128) model = AutoModel.from_pretrained(model_path, config=config, trust_remote_code=True) prefix_state_dict = torch.load(os.path.join(CHECKPOINT_PATH, "pytorch_model.bin")) new_prefix_state_dict = {} for k, v in prefix_state_dict.items(): if k.startswith("transformer.prefix_encoder."): new_prefix_state_dict[k[len("transformer.prefix_encoder."):]] = v model.transformer.prefix_encoder.load_state_dict(new_prefix_state_dict) # 之后根据需求可以进行量化,也可以直接使用: kernel_file = "{}\\quantization_kernels.so".format(model_path) model = model.quantize(bits=4,kernel_file=kernel_file) model = model.half().cuda() model.transformer.prefix_encoder.float() model = model.eval() # response, history = model.chat(tokenizer, "你好呀", history=[]) # print("response:", response) def parse_text(text): lines = text.split("\n") lines = [line for line in lines if line != ""] count = 0 for i, line in enumerate(lines): if "```" in line: count += 1 items = line.split('`') if count % 2 == 1: lines[i] = f'
'
            else:
                lines[i] = f'
' else: if i > 0: if count % 2 == 1: line = line.replace("`", "\`") line = line.replace("<", "<") line = line.replace(">", ">") line = line.replace(" ", " ") line = line.replace("*", "*") line = line.replace("_", "_") line = line.replace("-", "-") line = line.replace(".", ".") line = line.replace("!", "!") line = line.replace("(", "(") line = line.replace(")", ")") line = line.replace("$", "$") lines[i] = "
"+line text = "".join(lines) return text def predict(input, chatbot, max_length, top_p, temperature, history): chatbot.append((parse_text(input), "")) for response, history in model.stream_chat(tokenizer, input, history, max_length=max_length, top_p=top_p, temperature=temperature): chatbot[-1] = (parse_text(input), parse_text(response)) yield chatbot, history response_new = '' history = [] for i in range(3000): length_history = len(history) if (length_history > 5): # 如果对话长度太长,就把之前的遗忘掉 del history[0] del history[0] # print('\nYou:',end='') print('\033[1;31m{}\033[0m'.format('\nYou:'),end='') msg = input() print('\033[1;34m{}\033[0m'.format('ChatGLM:'),end='') for chatbot, history in predict(input=msg, chatbot=[], max_length=10000, top_p=0.5, temperature=0.5, history=history): response_old = response_new response_new = chatbot[0][1] new_single = response_new.replace(response_old, '') print(new_single,end='')