andrewqian123 commited on
Commit
a0e05d2
1 Parent(s): ad7a067

Update processing_minicpmv.py

Browse files
Files changed (1) hide show
  1. processing_minicpmv.py +8 -0
processing_minicpmv.py CHANGED
@@ -164,6 +164,14 @@ class MiniCPMVProcessor(ProcessorMixin):
164
  return input_ids.unsqueeze(0), image_bounds
165
 
166
  def _convert_images_texts_to_inputs(self, images, texts, do_pad=False, truncation=None, max_length=None, return_tensors=None):
 
 
 
 
 
 
 
 
167
  if not len(images):
168
  model_inputs = self.tokenizer(texts, return_tensors=return_tensors, padding=do_pad, truncation=truncation, max_length=max_length)
169
  return MiniCPMVBatchFeature(data={**model_inputs})
 
164
  return input_ids.unsqueeze(0), image_bounds
165
 
166
  def _convert_images_texts_to_inputs(self, images, texts, do_pad=False, truncation=None, max_length=None, return_tensors=None):
167
+ assert len(images) == len(texts)
168
+ batch = []
169
+ for ind in range(len(images)):
170
+ result = _convert_images_texts_to_inputs2(self, images[ind], texts[ind], do_pad, truncation, max_length, return_tensors)
171
+ batch.append(result)
172
+ return batch
173
+
174
+ def _convert_images_texts_to_inputs2(self, images, texts, do_pad=False, truncation=None, max_length=None, return_tensors=None):
175
  if not len(images):
176
  model_inputs = self.tokenizer(texts, return_tensors=return_tensors, padding=do_pad, truncation=truncation, max_length=max_length)
177
  return MiniCPMVBatchFeature(data={**model_inputs})