Model not working for CPU
#17
by
vivek0797
- opened
Hi @TheBloke , I'm trying to run this model for the CPU device. But it fails with the following error:
Traceback (most recent call last):
File "/home/vivek/work/falcon_7b_gptq.py", line 35, in <module>
output = model.generate(inputs=input_ids, temperature=0.7, max_new_tokens=512)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/vivek/work/venv/lib/python3.11/site-packages/auto_gptq/modeling/_base.py", line 442, in generate
with torch.inference_mode(), torch.amp.autocast(device_type=self.device.type):
^^^^^^^^^^^
File "/home/vivek/work/venv/lib/python3.11/site-packages/auto_gptq/modeling/_base.py", line 431, in device
device = [d for d in self.hf_device_map.values() if d not in {'cpu', 'disk'}][0]
I'm using the following code:
from transformers import AutoTokenizer, pipeline, logging
from auto_gptq import AutoGPTQForCausalLM, BaseQuantizeConfig
import argparse
model_name_or_path = "TheBloke/falcon-7b-instruct-GPTQ"
model_basename = "model"
use_triton = False
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path, use_fast=True)
model = AutoGPTQForCausalLM.from_quantized(model_name_or_path,
model_basename=model_basename,
use_safetensors=True,
trust_remote_code=True,
device="cpu",
use_triton=use_triton,
quantize_config=None)
prompt = "Tell me about AI"
prompt_template=f'''A helpful assistant who helps the user with any questions asked.
User: {prompt}
Assistant:'''
print("\n\n*** Generate:")
input_ids = tokenizer(prompt_template, return_tensors='pt').input_ids
output = model.generate(inputs=input_ids, temperature=0.7, max_new_tokens=512)
print(tokenizer.decode(output[0]))