CUDA error: CUBLAS_STATUS_EXECUTION_FAILED
Exception in callback _raise_exception_on_finish(<Future finis...scaleType 0')>) at /usr/local/lib/python3.10/dist-packages/lmdeploy/vl/engine.py:20
handle: <Handle _raise_exception_on_finish(<Future finis...scaleType 0')>) at /usr/local/lib/python3.10/dist-packages/lmdeploy/vl/engine.py:20>
Traceback (most recent call last):
File "/usr/lib/python3.10/asyncio/events.py", line 80, in _run
self._context.run(self._callback, *self._args)
File "/usr/local/lib/python3.10/dist-packages/lmdeploy/vl/engine.py", line 27, in _raise_exception_on_finish
raise e
File "/usr/local/lib/python3.10/dist-packages/lmdeploy/vl/engine.py", line 23, in _raise_exception_on_finish
task.result()
File "/usr/lib/python3.10/concurrent/futures/thread.py", line 58, in run
result = self.fn(*self.args, **self.kwargs)
File "/usr/local/lib/python3.10/dist-packages/lmdeploy/vl/engine.py", line 169, in forward
outputs = self.model.forward(*func_inputs)
File "/usr/local/lib/python3.10/dist-packages/torch/utils/_contextlib.py", line 115, in decorate_context
return func(*args, **kwargs)
File "/usr/local/lib/python3.10/dist-packages/lmdeploy/vl/model/qwen2.py", line 102, in forward
image_embeds = self.model.visual(pixel_values,
File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py", line 1532, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py", line 1541, in _call_impl
return forward_call(*args, **kwargs)
File "/usr/local/lib/python3.10/dist-packages/transformers/models/qwen2_vl/modeling_qwen2_vl.py", line 1074, in forward
hidden_states = blk(hidden_states, cu_seqlens=cu_seqlens, rotary_pos_emb=rotary_pos_emb)
File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py", line 1532, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py", line 1541, in _call_impl
return forward_call(*args, **kwargs)
File "/usr/local/lib/python3.10/dist-packages/transformers/models/qwen2_vl/modeling_qwen2_vl.py", line 434, in forward
hidden_states = hidden_states + self.mlp(self.norm2(hidden_states))
File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py", line 1532, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py", line 1541, in _call_impl
return forward_call(*args, **kwargs)
File "/usr/local/lib/python3.10/dist-packages/transformers/models/qwen2_vl/modeling_qwen2_vl.py", line 322, in forward
return self.fc2(self.act(self.fc1(x)))
File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py", line 1532, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py", line 1541, in _call_impl
return forward_call(*args, **kwargs)
File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/linear.py", line 116, in forward
return F.linear(input, self.weight, self.bias)
RuntimeError: CUDA error: CUBLAS_STATUS_EXECUTION_FAILED when calling cublasLtMatmul with transpose_mat1 1 transpose_mat2 0 m 5120 n 1296 k 1280 mat1_ld 1280 mat2_ld 1280 result_ld 5120 abcType 2 computeType 68 scaleType 0