czczup commited on
Commit
dc39329
1 Parent(s): 746114f

Upload folder using huggingface_hub

Browse files
Files changed (1) hide show
  1. modeling_internvl_chat.py +6 -5
modeling_internvl_chat.py CHANGED
@@ -33,6 +33,7 @@ def version_cmp(v1, v2, op='eq'):
33
  class InternVLChatModel(PreTrainedModel):
34
  config_class = InternVLChatConfig
35
  main_input_name = 'pixel_values'
 
36
  _supports_flash_attn_2 = True
37
  _no_split_modules = ['InternVisionModel', 'LlamaDecoderLayer']
38
 
@@ -97,7 +98,7 @@ class InternVLChatModel(PreTrainedModel):
97
  return_dict = return_dict if return_dict is not None else self.config.use_return_dict
98
 
99
  image_flags = image_flags.squeeze(-1)
100
- input_embeds = self.language_model.get_input_embeddings()(input_ids)
101
 
102
  vit_embeds = self.extract_feature(pixel_values)
103
  vit_embeds = vit_embeds[image_flags == 1]
@@ -230,8 +231,8 @@ class InternVLChatModel(PreTrainedModel):
230
 
231
  tokenizer.padding_side = 'left'
232
  model_inputs = tokenizer(queries, return_tensors='pt', padding=True)
233
- input_ids = model_inputs['input_ids'].cuda()
234
- attention_mask = model_inputs['attention_mask'].cuda()
235
  eos_token_id = tokenizer.convert_tokens_to_ids(template.sep)
236
  generation_config['eos_token_id'] = eos_token_id
237
  generation_output = self.generate(
@@ -279,8 +280,8 @@ class InternVLChatModel(PreTrainedModel):
279
  query = query.replace('<image>', image_tokens, 1)
280
 
281
  model_inputs = tokenizer(query, return_tensors='pt')
282
- input_ids = model_inputs['input_ids'].cuda()
283
- attention_mask = model_inputs['attention_mask'].cuda()
284
  generation_config['eos_token_id'] = eos_token_id
285
  generation_output = self.generate(
286
  pixel_values=pixel_values,
 
33
  class InternVLChatModel(PreTrainedModel):
34
  config_class = InternVLChatConfig
35
  main_input_name = 'pixel_values'
36
+ base_model_prefix = 'language_model'
37
  _supports_flash_attn_2 = True
38
  _no_split_modules = ['InternVisionModel', 'LlamaDecoderLayer']
39
 
 
98
  return_dict = return_dict if return_dict is not None else self.config.use_return_dict
99
 
100
  image_flags = image_flags.squeeze(-1)
101
+ input_embeds = self.language_model.get_input_embeddings()(input_ids).clone()
102
 
103
  vit_embeds = self.extract_feature(pixel_values)
104
  vit_embeds = vit_embeds[image_flags == 1]
 
231
 
232
  tokenizer.padding_side = 'left'
233
  model_inputs = tokenizer(queries, return_tensors='pt', padding=True)
234
+ input_ids = model_inputs['input_ids'].to(self.device)
235
+ attention_mask = model_inputs['attention_mask'].to(self.device)
236
  eos_token_id = tokenizer.convert_tokens_to_ids(template.sep)
237
  generation_config['eos_token_id'] = eos_token_id
238
  generation_output = self.generate(
 
280
  query = query.replace('<image>', image_tokens, 1)
281
 
282
  model_inputs = tokenizer(query, return_tensors='pt')
283
+ input_ids = model_inputs['input_ids'].to(self.device)
284
+ attention_mask = model_inputs['attention_mask'].to(self.device)
285
  generation_config['eos_token_id'] = eos_token_id
286
  generation_output = self.generate(
287
  pixel_values=pixel_values,