from transformers import AutoTokenizer, AutoModel | |
kernel_file = ".\\models\\quantization_kernels.so" | |
tokenizer = AutoTokenizer.from_pretrained(".\\", trust_remote_code=True) | |
model = AutoModel.from_pretrained(".\\", trust_remote_code=True).float() | |
# model = model.quantize(bits=4, kernel_file=kernel_file) | |
# response, history = model.chat(tokenizer, "你好", history=[]) | |
# print("response:", response) | |
tokenizer.save_pretrained("models") | |
model.save_pretrained("models") | |