wizcat/internlm-chat-7b-v1_1-gptq

internlm-chat-7b-v1_1をGPTQ変換したモデルです
利用に当たってはhttps://huggingface.co/internlm/internlm-chat-7b-v1_1 のライセンスに従って下さい

推論用コード

import torch
import time
from transformers import AutoTokenizer, AutoModelForCausalLM,GPTQConfig

model_path = r".\internlm-chat-7b-v1_1-gptq"

tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)

gptq_config = GPTQConfig(bits= 4 , disable_exllama= True )
model = AutoModelForCausalLM.from_pretrained( model_path , device_map= "auto" , quantization_config = gptq_config,trust_remote_code=True)
model = model.eval()

history = []
  
while True:
    txt = input("msg:")
    start_time = time.perf_counter()
    response, history = model.chat(tokenizer, txt, history=history)
    print(response)
    end_time = time.perf_counter()
    elapsed_time = end_time - start_time
    print(f"worktime:{elapsed_time}")