如何解决错误:RuntimeError: probability tensor contains either `inf`, `nan` or element < 0

#13
by lele12306 - opened

RuntimeError Traceback (most recent call last)
Cell In[2], line 16
13 question = 'What is in the image?'
14 msgs = [{'role': 'user', 'content': [image, question]}]
---> 16 res = model.chat(
17 image=None,
18 msgs=msgs,
19 tokenizer=tokenizer
20 )
21 print(res)
23 ## if you want to use streaming, please make sure sampling=True and stream=True
24 ## the model.chat will return a generator

File ~/.cache/huggingface/modules/transformers_modules/MiniCPM-V-2_6/modeling_minicpmv.py:380, in MiniCPMV.chat(self, image, msgs, tokenizer, processor, vision_hidden_states, max_new_tokens, min_new_tokens, sampling, max_inp_length, system_prompt, stream, max_slice_nums, use_image_id, **kwargs)
378 inputs.pop("image_sizes")
379 with torch.inference_mode():
--> 380 res = self.generate(
381 **inputs,
382 tokenizer=tokenizer,
383 max_new_tokens=max_new_tokens,
384 vision_hidden_states=vision_hidden_states,
385 stream=stream,
386 decode_text=True,
387 **generation_config
388 )
390 if stream:
391 def stream_gen():

File ~/.cache/huggingface/modules/transformers_modules/MiniCPM-V-2_6/modeling_minicpmv.py:261, in MiniCPMV.generate(self, input_ids, pixel_values, tgt_sizes, image_bound, attention_mask, tokenizer, vision_hidden_states, return_vision_hidden_states, stream, decode_text, **kwargs)
259 result = self._decode_stream(model_inputs["inputs_embeds"], tokenizer, **kwargs)
260 else:
--> 261 result = self._decode(model_inputs["inputs_embeds"], tokenizer, attention_mask, decode_text=decode_text, **kwargs)
263 if return_vision_hidden_states:
264 return result, vision_hidden_states

File ~/.cache/huggingface/modules/transformers_modules/MiniCPM-V-2_6/modeling_minicpmv.py:185, in MiniCPMV._decode(self, inputs_embeds, tokenizer, attention_mask, decode_text, **kwargs)
183 def _decode(self, inputs_embeds, tokenizer, attention_mask, decode_text=False, **kwargs):
184 terminators = [tokenizer.convert_tokens_to_ids(i) for i in self.terminators]
--> 185 output = self.llm.generate(
186 inputs_embeds=inputs_embeds,
187 pad_token_id=0,
188 eos_token_id=terminators,
189 attention_mask=attention_mask,
190 **kwargs
191 )
192 if decode_text:
193 return self._decode_text(output, tokenizer)

File ~/.conda/envs/jupyter-public/lib/python3.10/site-packages/torch/utils/_contextlib.py:115, in context_decorator..decorate_context(*args, **kwargs)
112 @functools.wraps(func)
113 def decorate_context(*args, **kwargs):
114 with ctx_factory():
--> 115 return func(*args, **kwargs)

File ~/.conda/envs/jupyter-public/lib/python3.10/site-packages/transformers/generation/utils.py:1622, in GenerationMixin.generate(self, inputs, generation_config, logits_processor, stopping_criteria, prefix_allowed_tokens_fn, synced_gpus, assistant_model, streamer, negative_prompt_ids, negative_prompt_attention_mask, **kwargs)
1614 input_ids, model_kwargs = self._expand_inputs_for_generation(
1615 input_ids=input_ids,
1616 expand_size=generation_config.num_return_sequences,
1617 is_encoder_decoder=self.config.is_encoder_decoder,
1618 **model_kwargs,
1619 )
1621 # 13. run sample
-> 1622 result = self._sample(
1623 input_ids,
1624 logits_processor=prepared_logits_processor,
1625 logits_warper=logits_warper,
1626 stopping_criteria=prepared_stopping_criteria,
1627 pad_token_id=generation_config.pad_token_id,
1628 output_scores=generation_config.output_scores,
1629 output_logits=generation_config.output_logits,
1630 return_dict_in_generate=generation_config.return_dict_in_generate,
1631 synced_gpus=synced_gpus,
1632 streamer=streamer,
1633 **model_kwargs,
1634 )
1636 elif generation_mode == GenerationMode.BEAM_SEARCH:
1637 # 11. prepare beam search scorer
1638 beam_scorer = BeamSearchScorer(
1639 batch_size=batch_size,
1640 num_beams=generation_config.num_beams,
(...)
1645 max_length=generation_config.max_length,
1646 )

File ~/.conda/envs/jupyter-public/lib/python3.10/site-packages/transformers/generation/utils.py:2829, in GenerationMixin._sample(self, input_ids, logits_processor, stopping_criteria, logits_warper, max_length, pad_token_id, eos_token_id, output_attentions, output_hidden_states, output_scores, output_logits, return_dict_in_generate, synced_gpus, streamer, **model_kwargs)
2827 # sample
2828 probs = nn.functional.softmax(next_token_scores, dim=-1)
-> 2829 next_tokens = torch.multinomial(probs, num_samples=1).squeeze(1)
2831 # finished sentences should have their next token be a padding token
2832 if eos_token_id is not None:

RuntimeError: probability tensor contains either inf, nan or element < 0

OpenBMB org

hi,Have you ever tried assigning a string parameter with an address to the image?

I got the same error.
I was only trying to run the example code given for the quantized version.

test.py

import torch
from PIL import Image
from transformers import AutoModel, AutoTokenizer

model = AutoModel.from_pretrained('openbmb/MiniCPM-V-2_6-int4', trust_remote_code=True, attn_implementation='eager')
tokenizer = AutoTokenizer.from_pretrained('openbmb/MiniCPM-V-2_6-int4', trust_remote_code=True)
model.eval()

image = Image.open('/kaggle/input/exam-marksheets/IMG_3451.jpg').convert('RGB')
question = 'What is the final marks?'
msgs = [{'role': 'user', 'content': [image, question]}]

res = model.chat(
image=None,
msgs=msgs,
tokenizer=tokenizer
)
print(res)

Maybe it is caused by nan value in next_token_logits from LLM model output.

I faced a similar error and my error was fixed after adding
model.bfloat16()
to my code after calling the model

Sign up or log in to comment