Offloading to cpu not working?
#21
by
fahadh4ilyas
- opened
I got this error when loading this model with text generation web ui with offloading to cpu
Traceback (most recent call last):
File "/home/fahadh/text-generation-webui/modules/callbacks.py", line 55, in gentask
ret = self.mfunc(callback=_callback, *args, **self.kwargs)
File "/home/fahadh/text-generation-webui/modules/text_generation.py", line 307, in generate_with_callback
shared.model.generate(**kwargs)
File "/home/fahadh/anaconda3/envs/textgen/lib/python3.10/site-packages/auto_gptq/modeling/_base.py", line 438, in generate
return self.model.generate(**kwargs)
File "/home/fahadh/anaconda3/envs/textgen/lib/python3.10/site-packages/torch/utils/_contextlib.py", line 115, in decorate_context
return func(*args, **kwargs)
File "/home/fahadh/anaconda3/envs/textgen/lib/python3.10/site-packages/transformers/generation/utils.py", line 1633, in generate
return self.sample(
File "/home/fahadh/anaconda3/envs/textgen/lib/python3.10/site-packages/transformers/generation/utils.py", line 2755, in sample
outputs = self(
File "/home/fahadh/anaconda3/envs/textgen/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
return forward_call(*args, **kwargs)
File "/home/fahadh/.cache/huggingface/modules/transformers_modules/TheBloke_falcon-40b-instruct-GPTQ/modelling_RW.py", line 759, in forward
transformer_outputs = self.transformer(
File "/home/fahadh/anaconda3/envs/textgen/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
return forward_call(*args, **kwargs)
File "/home/fahadh/.cache/huggingface/modules/transformers_modules/TheBloke_falcon-40b-instruct-GPTQ/modelling_RW.py", line 654, in forward
outputs = block(
File "/home/fahadh/anaconda3/envs/textgen/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
return forward_call(*args, **kwargs)
File "/home/fahadh/anaconda3/envs/textgen/lib/python3.10/site-packages/accelerate/hooks.py", line 165, in new_forward
output = old_forward(*args, **kwargs)
File "/home/fahadh/.cache/huggingface/modules/transformers_modules/TheBloke_falcon-40b-instruct-GPTQ/modelling_RW.py", line 396, in forward
attn_outputs = self.self_attention(
File "/home/fahadh/anaconda3/envs/textgen/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
return forward_call(*args, **kwargs)
File "/home/fahadh/.cache/huggingface/modules/transformers_modules/TheBloke_falcon-40b-instruct-GPTQ/modelling_RW.py", line 252, in forward
fused_qkv = self.query_key_value(hidden_states) # [batch_size, seq_length, 3 x hidden_size]
File "/home/fahadh/anaconda3/envs/textgen/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
return forward_call(*args, **kwargs)
File "/home/fahadh/anaconda3/envs/textgen/lib/python3.10/site-packages/auto_gptq/nn_modules/qlinear/qlinear_cuda_old.py", line 266, in forward
out = out + self.bias if self.bias is not None else out
RuntimeError: Expected all tensors to be on the same device, but found at least two devices, cuda:0 and cpu!
Exception in thread Thread-3 (gentask):
Traceback (most recent call last):
File "/home/fahadh/anaconda3/envs/textgen/lib/python3.10/threading.py", line 1016, in _bootstrap_inner
self.run()
File "/home/fahadh/anaconda3/envs/textgen/lib/python3.10/threading.py", line 953, in run
self._target(*self._args, **self._kwargs)
File "/home/fahadh/text-generation-webui/modules/callbacks.py", line 62, in gentask
clear_torch_cache()
File "/home/fahadh/text-generation-webui/modules/callbacks.py", line 94, in clear_torch_cache
torch.cuda.empty_cache()
File "/home/fahadh/anaconda3/envs/textgen/lib/python3.10/site-packages/torch/cuda/memory.py", line 133, in empty_cache
torch._C._cuda_emptyCache()
RuntimeError: CUDA error: an illegal memory access was encountered
CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1.
Compile with `TORCH_USE_CUDA_DSA` to enable device-side assertions.
This comment has been hidden