前段时间还能用int4版本了,现在又用不了了,怎么回事。。。。。。。
前段时间还能用int4版本了,这么又用不了了,是改版本了吗,怎么回事
报什么错
报什么错
Traceback (most recent call last):
File "D:\python310\lib\site-packages\gradio\routes.py", line 414, in run_predict
output = await app.get_blocks().process_api(
File "D:\python310\lib\site-packages\gradio\blocks.py", line 1323, in process_api
result = await self.call_function(
File "D:\python310\lib\site-packages\gradio\blocks.py", line 1067, in call_function
prediction = await utils.async_iteration(iterator)
File "D:\python310\lib\site-packages\gradio\utils.py", line 339, in async_iteration
return await iterator.anext()
File "D:\python310\lib\site-packages\gradio\utils.py", line 332, in anext
return await anyio.to_thread.run_sync(
File "D:\python310\lib\site-packages\anyio\to_thread.py", line 33, in run_sync
return await get_asynclib().run_sync_in_worker_thread(
File "D:\python310\lib\site-packages\anyio_backends_asyncio.py", line 877, in run_sync_in_worker_thread
return await future
File "D:\python310\lib\site-packages\anyio_backends_asyncio.py", line 807, in run
result = context.run(func, *args)
File "D:\python310\lib\site-packages\gradio\utils.py", line 315, in run_sync_iterator_async
return next(iterator)
File "D:\ChatGLM2-6B-main\web_demo - int4.py", line 71, in predict
for response, history, past_key_values in model.stream_chat(tokenizer, input, history, past_key_values=past_key_values,
File "D:\python310\lib\site-packages\torch\utils_contextlib.py", line 35, in generator_context
response = gen.send(None)
File "C:\Users\Administrator/.cache\huggingface\modules\transformers_modules\int4\modeling_chatglm.py", line 1057, in stream_chat
for outputs in self.stream_generate(**inputs, past_key_values=past_key_values,
File "D:\python310\lib\site-packages\torch\utils_contextlib.py", line 35, in generator_context
response = gen.send(None)
File "C:\Users\Administrator/.cache\huggingface\modules\transformers_modules\int4\modeling_chatglm.py", line 1142, in stream_generate
outputs = self(
File "D:\python310\lib\site-packages\torch\nn\modules\module.py", line 1501, in _call_impl
return forward_call(*args, **kwargs)
File "C:\Users\Administrator/.cache\huggingface\modules\transformers_modules\int4\modeling_chatglm.py", line 931, in forward
transformer_outputs = self.transformer(
File "D:\python310\lib\site-packages\torch\nn\modules\module.py", line 1501, in _call_impl
return forward_call(*args, **kwargs)
File "C:\Users\Administrator/.cache\huggingface\modules\transformers_modules\int4\modeling_chatglm.py", line 827, in forward
hidden_states, presents, all_hidden_states, all_self_attentions = self.encoder(
File "D:\python310\lib\site-packages\torch\nn\modules\module.py", line 1501, in _call_impl
return forward_call(*args, **kwargs)
File "C:\Users\Administrator/.cache\huggingface\modules\transformers_modules\int4\modeling_chatglm.py", line 637, in forward
layer_ret = layer(
File "D:\python310\lib\site-packages\torch\nn\modules\module.py", line 1501, in _call_impl
return forward_call(*args, **kwargs)
File "C:\Users\Administrator/.cache\huggingface\modules\transformers_modules\int4\modeling_chatglm.py", line 541, in forward
attention_output, kv_cache = self.self_attention(
File "D:\python310\lib\site-packages\torch\nn\modules\module.py", line 1501, in _call_impl
return forward_call(*args, **kwargs)
File "C:\Users\Administrator/.cache\huggingface\modules\transformers_modules\int4\modeling_chatglm.py", line 373, in forward
mixed_x_layer = self.query_key_value(hidden_states)
File "D:\python310\lib\site-packages\torch\nn\modules\module.py", line 1501, in _call_impl
return forward_call(*args, **kwargs)
File "C:\Users\Administrator/.cache\huggingface\modules\transformers_modules\int4\quantization.py", line 322, in forward
output = W8A16Linear.apply(input, self.weight, self.weight_scale, self.weight_bit_width)
File "D:\python310\lib\site-packages\torch\autograd\function.py", line 506, in apply
return super().apply(*args, **kwargs) # type: ignore[misc]
File "C:\Users\Administrator/.cache\huggingface\modules\transformers_modules\int4\quantization.py", line 54, in forward
weight = extract_weight_to_half(quant_w, scale_w, weight_bit_width)
File "C:\Users\Administrator/.cache\huggingface\modules\transformers_modules\int4\quantization.py", line 267, in extract_weight_to_half
kernels.int4WeightExtractionHalf if scale_list.dtype == torch.half else kernels.int4WeightExtractionBFloat16
AttributeError: 'NoneType' object has no attribute 'int4WeightExtractionHalf'